A new implementation of astimezone() that does what we agreed on in all

author Tim Peters <tim.peters@gmail.com>

Sat, 4 Jan 2003 06:03:15 +0000 (06:03 +0000)

committer Tim Peters <tim.peters@gmail.com>

Sat, 4 Jan 2003 06:03:15 +0000 (06:03 +0000)
author Tim Peters <tim.peters@gmail.com>
Sat, 4 Jan 2003 06:03:15 +0000 (06:03 +0000)
committer Tim Peters <tim.peters@gmail.com>
Sat, 4 Jan 2003 06:03:15 +0000 (06:03 +0000)
diff --git a/Doc/lib/libdatetime.tex b/Doc/lib/libdatetime.tex

index d251e0f40e686b32c70e01f48251847b728a12c5..5504c2209088f001e2aee1d44119ae0c6a79a195 100644 (file)
--- a/Doc/lib/libdatetime.tex
+++ b/Doc/lib/libdatetime.tex
@@ -924,11 +924,11 @@ implement all of them.
        \code{tz.utcoffset(dt) - tz.dst(dt)}
  
    must return the same result for every \class{datetimetz} \var{dt}
-  in a given year with \code{dt.tzinfo==tz}  For sane \class{tzinfo}
-  subclasses, this expression yields the time zone's "standard offset"
-  within the year, which should be the same across all days in the year.
-  The implementation of \method{datetimetz.astimezone()} relies on this,
-  but cannot detect violations; it's the programmer's responsibility to
+  with \code{dt.tzinfo==tz}  For sane \class{tzinfo} subclasses, this
+  expression yields the time zone's "standard offset", which should not
+  depend on the date or the time, but only on geographic location.  The
+  implementation of \method{datetimetz.astimezone()} relies on this, but
+  cannot detect violations; it's the programmer's responsibility to
    ensure it.
  
  \begin{methoddesc}{tzname}{self, dt}
@@ -970,6 +970,50 @@ Example \class{tzinfo} classes:
  
  \verbatiminput{tzinfo-examples.py}
  
+Note that there are unavoidable subtleties twice per year in a tzinfo
+subclass accounting for both standard and daylight time, at the DST
+transition points.  For concreteness, consider US Eastern (UTC -0500),
+where EDT begins the minute after 1:59 (EST) on the first Sunday in
+April, and ends the minute after 1:59 (EDT) on the last Sunday in October:
+
+\begin{verbatim}
+    UTC   3:MM  4:MM  5:MM  6:MM  7:MM  8:MM
+    EST  22:MM 23:MM  0:MM  1:MM  2:MM  3:MM
+    EDT  23:MM  0:MM  1:MM  2:MM  3:MM  4:MM
+
+  start  22:MM 23:MM  0:MM  1:MM  3:MM  4:MM
+
+    end  23:MM  0:MM  1:MM  1:MM  2:MM  3:MM
+\end{verbatim}
+
+When DST starts (the "start" line), the local wall clock leaps from 1:59
+to 3:00.  A wall time of the form 2:MM doesn't really make sense on that
+day, so astimezone(Eastern) won't deliver a result with hour=2 on the
+day DST begins.  How an Eastern class chooses to interpret 2:MM on
+that day is its business.  The example Eastern class above chose to
+consider it as a time in EDT, simply because it "looks like it's
+after 2:00", and so synonymous with the EST 1:MM times on that day.
+Your Eastern class may wish, for example, to raise an exception instead
+when it sees a 2:MM time on the day Eastern begins.
+
+When DST ends (the "end" line), there's a potentially worse problem:
+there's an hour that can't be spelled at all in local wall time, the
+hour beginning at the moment DST ends.  In this example, that's times of
+the form 6:MM UTC on the day daylight time ends.  The local wall clock
+leaps from 1:59 (daylight time) back to 1:00 (standard time) again.
+1:MM is taken as daylight time (it's "before 2:00"), so maps to 5:MM UTC.
+2:MM is taken as standard time (it's "after 2:00"), so maps to 7:MM UTC.
+There is no local time that maps to 6:MM UTC on this day.
+
+Just as the wall clock does, astimezone(Eastern) maps both UTC hours 5:MM
+and 6:MM to Eastern hour 1:MM on this day.  However, this result is
+ambiguous (there's no way for Eastern to know which repetition of 1:MM
+is intended).  Applications that can't bear such ambiguity even one hour
+per year should avoid using hybrid tzinfo classes; there are no
+ambiguities when using UTC, or any other fixed-offset tzinfo subclass
+(such as a class representing only EST (fixed offset -5 hours), or only
+EDT (fixed offset -4 hours)).
+
  
  \subsection{\class{timetz} Objects \label{datetime-timetz}}
  
diff --git a/Doc/lib/tzinfo-examples.py b/Doc/lib/tzinfo-examples.py

index 92f298c3c93c86d8efcd3819127f7bc7c84cc081..70a49667d4e005bd3ce1dc63a5d3fed23f3f7ba5 100644 (file)
--- a/Doc/lib/tzinfo-examples.py
+++ b/Doc/lib/tzinfo-examples.py
@@ -1,6 +1,7 @@
-from datetime import tzinfo, timedelta
+from datetime import tzinfo, timedelta, datetime
  
  ZERO = timedelta(0)
+HOUR = timedelta(hours=1)
  
  # A UTC class.
  
@@ -76,3 +77,63 @@ class LocalTimezone(tzinfo):
          return tt.tm_isdst > 0
  
  Local = LocalTimezone()
+
+
+# A complete implementation of current DST rules for major US time zones.
+
+def first_sunday_on_or_after(dt):
+    days_to_go = 6 - dt.weekday()
+    if days_to_go:
+        dt += timedelta(days_to_go)
+    return dt
+
+# In the US, DST starts at 2am (standard time) on the first Sunday in April.
+DSTSTART = datetime(1, 4, 1, 2)
+# and ends at 2am (DST time; 1am standard time) on the last Sunday of Oct.
+# which is the first Sunday on or after Oct 25.
+DSTEND = datetime(1, 10, 25, 2)
+
+class USTimeZone(tzinfo):
+
+    def __init__(self, hours, reprname, stdname, dstname):
+        self.stdoffset = timedelta(hours=hours)
+        self.reprname = reprname
+        self.stdname = stdname
+        self.dstname = dstname
+
+    def __repr__(self):
+        return self.reprname
+
+    def tzname(self, dt):
+        if self.dst(dt):
+            return self.dstname
+        else:
+            return self.stdname
+
+    def utcoffset(self, dt):
+        return self.stdoffset + self.dst(dt)
+
+    def dst(self, dt):
+        if dt is None or dt.tzinfo is None:
+            # An exception may be sensible here, in one or both cases.
+            # It depends on how you want to treat them.  The astimezone()
+            # implementation always passes a datetimetz with
+            # dt.tzinfo == self.
+            return ZERO
+        assert dt.tzinfo is self
+
+        # Find first Sunday in April & the last in October.
+        start = first_sunday_on_or_after(DSTSTART.replace(year=dt.year))
+        end = first_sunday_on_or_after(DSTEND.replace(year=dt.year))
+
+        # Can't compare naive to aware objects, so strip the timezone from
+        # dt first.
+        if start <= dt.replace(tzinfo=None) < end:
+            return HOUR
+        else:
+            return ZERO
+
+Eastern  = USTimeZone(-5, "Eastern",  "EST", "EDT")
+Central  = USTimeZone(-6, "Central",  "CST", "CDT")
+Mountain = USTimeZone(-7, "Mountain", "MST", "MDT")
+Pacific  = USTimeZone(-8, "Pacific",  "PST", "PDT")
diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py

index c9f76743219881242ff5644b7c6e828f6f8138a9..c3779607e5777b567df33a53d6b6969ac3613589 100644 (file)
--- a/Lib/test/test_datetime.py
+++ b/Lib/test/test_datetime.py
@@ -2592,7 +2592,7 @@ Pacific  = USTimeZone(-8, "Pacific",  "PST", "PDT")
  utc_real = FixedOffset(0, "UTC", 0)
  # For better test coverage, we want another flavor of UTC that's west of
  # the Eastern and Pacific timezones.
-utc_fake = FixedOffset(-12, "UTCfake", 0)
+utc_fake = FixedOffset(-12*60, "UTCfake", 0)
  
  class TestTimezoneConversions(unittest.TestCase):
      # The DST switch times for 2002, in local time.
@@ -2643,25 +2643,17 @@ class TestTimezoneConversions(unittest.TestCase):
          # 1:MM:SS is taken to be daylight time, and 2:MM:SS as
          # standard time.  The hour 1:MM:SS standard time ==
          # 2:MM:SS daylight time can't be expressed in local time.
+        # Nevertheless, we want conversion back from UTC to mimic
+        # the local clock's "repeat an hour" behavior.
          nexthour_utc = asutc + HOUR
+        nexthour_tz = nexthour_utc.astimezone(tz)
          if dt.date() == dstoff.date() and dt.hour == 1:
              # We're in the hour before DST ends.  The hour after
-            # is ineffable.
-            # For concreteness, picture Eastern.  during is of
-            # the form 1:MM:SS, it's daylight time, so that's
-            # 5:MM:SS UTC.  Adding an hour gives 6:MM:SS UTC.
-            # Daylight time ended at 2+4 == 6:00:00 UTC, so
-            # 6:MM:SS is (correctly) taken to be standard time.
-            # But standard time is at offset -5, and that maps
-            # right back to the 1:MM:SS Eastern we started with.
-            # That's correct, too, *if* 1:MM:SS were taken as
-            # being standard time.  But it's not -- on this day
-            # it's taken as daylight time.
-            self.assertRaises(ValueError,
-                              nexthour_utc.astimezone, tz)
+            # is ineffable.  We want the conversion back to repeat 1:MM.
+            expected_diff = ZERO
          else:
-            nexthour_tz = nexthour_utc.astimezone(utc)
-            self.assertEqual(nexthour_tz - dt, HOUR)
+            expected_diff = HOUR
+        self.assertEqual(nexthour_tz - dt, expected_diff)
  
      # Check a time that's outside DST.
      def checkoutside(self, dt, tz, utc):
@@ -2739,6 +2731,31 @@ class TestTimezoneConversions(unittest.TestCase):
          got = sixutc.astimezone(Eastern).astimezone(None)
          self.assertEqual(expected, got)
  
+        # Now on the day DST ends, we want "repeat an hour" behavior.
+        #  UTC  4:MM  5:MM  6:MM  7:MM  checking these
+        #  EST 23:MM  0:MM  1:MM  2:MM
+        #  EDT  0:MM  1:MM  2:MM  3:MM
+        # wall  0:MM  1:MM  1:MM  2:MM  against these
+        for utc in utc_real, utc_fake:
+            for tz in Eastern, Pacific:
+                first_std_hour = self.dstoff - timedelta(hours=3) # 23:MM
+                # Convert that to UTC.
+                first_std_hour -= tz.utcoffset(None)
+                # Adjust for possibly fake UTC.
+                asutc = first_std_hour + utc.utcoffset(None)
+                # First UTC hour to convert; this is 4:00 when utc=utc_real &
+                # tz=Eastern.
+                asutcbase = asutc.replace(tzinfo=utc)
+                for tzhour in (0, 1, 1, 2):
+                    expectedbase = self.dstoff.replace(hour=tzhour)
+                    for minute in 0, 30, 59:
+                        expected = expectedbase.replace(minute=minute)
+                        asutc = asutcbase.replace(minute=minute)
+                        astz = asutc.astimezone(tz)
+                        self.assertEqual(astz.replace(tzinfo=None), expected)
+                    asutcbase += HOUR
+
+
      def test_bogus_dst(self):
          class ok(tzinfo):
              def utcoffset(self, dt): return HOUR
diff --git a/Misc/NEWS b/Misc/NEWS

index 731095ee7976296773fd93ad1de4ded9fd2bbc28..89c7683443996dcc4db107d277b44e4fe72422fc 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -26,11 +26,15 @@ Extension modules
  - datetime changes:
  
    today() and now() now round system timestamps to the closest
-  microsecond <http://www.python.org/sf/661086>.
+  microsecond <http://www.python.org/sf/661086>.  This repairs an
+  irritation most likely seen on Windows systems.
  
    In dt.asdatetime(tz), if tz.utcoffset(dt) returns a duration,
    ValueError is raised if tz.dst(dt) returns None (2.3a1 treated it
-  as 0 instead).
+  as 0 instead, but a tzinfo subclass wishing to participate in
+  time zone conversion has to take a stand on whether it supports
+  DST; if you don't care about DST, then code dst() to return 0 minutes,
+  meaning that DST is never in effect).
  
    The tzinfo methods utcoffset() and dst() must return a timedelta object
    (or None) now.  In 2.3a1 they could also return an int or long, but that
@@ -40,6 +44,12 @@ Extension modules
    The example tzinfo class for local time had a bug.  It was replaced
    by a later example coded by Guido.
  
+  datetimetz.astimezone(tz) no longer raises an exception when the
+  input datetime has no UTC equivalent in tz.  For typical "hybrid" time
+  zones (a single tzinfo subclass modeling both standard and daylight
+  time), this case can arise one hour per year, at the hour daylight time
+  ends.  See new docs for details.
+
  Library
  -------
  
diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c

index c88251e4dd3211d3824f4dc9c3e97ab78f2576fd..6e283365cbf21d75983516cb5d4205f4360c5198 100644 (file)
--- a/Modules/datetimemodule.c
+++ b/Modules/datetimemodule.c
@@ -4754,7 +4754,7 @@ datetimetz_astimezone(PyDateTime_DateTimeTZ *self, PyObject *args,
  
         PyObject *result;
         PyObject *temp;
-       int selfoff, resoff, resdst, total_added_to_result;
+       int selfoff, resoff, dst1, dst2;
         int none;
         int delta;
  
@@ -4792,19 +4792,24 @@ datetimetz_astimezone(PyDateTime_DateTimeTZ *self, PyObject *args,
  
         /* See the long comment block at the end of this file for an
          * explanation of this algorithm.  That it always works requires a
-        * pretty intricate proof.
+        * pretty intricate proof.  There are many equivalent ways to code
+        * up the proof as an algorithm.  This way favors calling dst() over
+        * calling utcoffset(), because "the usual" utcoffset() calls dst()
+        * itself, and calling the latter instead saves a Python-level
+        * function call.  This way of coding it also follows the proof
+        * closely, w/ x=self, y=result, z=result, and z'=temp.
          */
-       resdst = call_dst(tzinfo, result, &none);
-       if (resdst == -1 && PyErr_Occurred())
+       dst1 = call_dst(tzinfo, result, &none);
+       if (dst1 == -1 && PyErr_Occurred())
                 goto Fail;
         if (none) {
                 PyErr_SetString(PyExc_ValueError, "astimezone(): utcoffset() "
                 "returned a duration but dst() returned None");
                 goto Fail;
         }
-       total_added_to_result = resoff - resdst - selfoff;
-       if (total_added_to_result != 0) {
-               mm += total_added_to_result;
+       delta = resoff - dst1 - selfoff;
+       if (delta) {
+               mm += delta;
                 if ((mm < 0 || mm >= 60) &&
                     normalize_datetime(&y, &m, &d, &hh, &mm, &ss, &us) < 0)
                         goto Fail;
@@ -4814,58 +4819,47 @@ datetimetz_astimezone(PyDateTime_DateTimeTZ *self, PyObject *args,
                 Py_DECREF(result);
                 result = temp;
  
-               resoff = call_utcoffset(tzinfo, result, &none);
-               if (resoff == -1 && PyErr_Occurred())
+               dst1 = call_dst(tzinfo, result, &none);
+               if (dst1 == -1 && PyErr_Occurred())
                         goto Fail;
                 if (none)
                         goto Inconsistent;
         }
-
-       /* The distance now from self to result is
-        * self - result == naive(self) - selfoff - (naive(result) - resoff) ==
-        * naive(self) - selfoff -
-        *             ((naive(self) + total_added_to_result - resoff) ==
-        * - selfoff - total_added_to_result + resoff.
-        */
-       delta = resoff - selfoff - total_added_to_result;
-
-       /* Now self and result are the same UTC time iff delta is 0.
-        * If it is 0, we're done, although that takes some proving.
-        */
-       if (delta == 0)
+       if (dst1 == 0)
                 return result;
  
-       total_added_to_result += delta;
-       mm += delta;
+       mm += dst1;
         if ((mm < 0 || mm >= 60) &&
             normalize_datetime(&y, &m, &d, &hh, &mm, &ss, &us) < 0)
                 goto Fail;
-
         temp = new_datetimetz(y, m, d, hh, mm, ss, us, tzinfo);
         if (temp == NULL)
                 goto Fail;
-       Py_DECREF(result);
-       result = temp;
  
-       resoff = call_utcoffset(tzinfo, result, &none);
-       if (resoff == -1 && PyErr_Occurred())
+       dst2 = call_dst(tzinfo, temp, &none);
+       if (dst2 == -1 && PyErr_Occurred()) {
+               Py_DECREF(temp);
                 goto Fail;
-       if (none)
+       }
+       if (none) {
+               Py_DECREF(temp);
                 goto Inconsistent;
+       }
  
-       if (resoff - selfoff == total_added_to_result)
-               /* self and result are the same UTC time */
-               return result;
-
-        /* Else there's no way to spell self in zone tzinfo. */
-        PyErr_SetString(PyExc_ValueError, "astimezone(): the source "
-                       "datetimetz can't be expressed in the target "
-                       "timezone's local time");
-        goto Fail;
+       if (dst1 == dst2) {
+               /* The normal case:  we want temp, not result. */
+               Py_DECREF(result);
+               result = temp;
+       }
+       else {
+               /* The "unspellable hour" at the end of DST. */
+               Py_DECREF(temp);
+       }
+       return result;
  
  Inconsistent:
-       PyErr_SetString(PyExc_ValueError, "astimezone(): tz.utcoffset() "
-                       "gave inconsistent results; cannot convert");
+       PyErr_SetString(PyExc_ValueError, "astimezone(): tz.dst() gave"
+                       "inconsistent results; cannot convert");
  
         /* fall thru to failure */
  Fail:
author	Tim Peters <tim.peters@gmail.com>
	Sat, 4 Jan 2003 06:03:15 +0000 (06:03 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Sat, 4 Jan 2003 06:03:15 +0000 (06:03 +0000)
Doc/lib/libdatetime.tex		patch \| blob \| history
Doc/lib/tzinfo-examples.py		patch \| blob \| history
Lib/test/test_datetime.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/datetimemodule.c		patch \| blob \| history