# Test that fromisoformat() fails on invalid values
bad_strs = [
'', # Empty string
+ '\ud800', # bpo-34454: Surrogate code point
'009-03-04', # Not 10 characters
'123456789', # Not a date
'200a-12-04', # Invalid character in year
'2009-01-32', # Invalid day
'2009-02-29', # Invalid leap day
'20090228', # Valid ISO8601 output not from isoformat()
+ '2009\ud80002\ud80028', # Separators are surrogate codepoints
]
for bad_str in bad_strs:
' ', 'T', '\u007f', # 1-bit widths
'\u0080', 'ʁ', # 2-bit widths
'ᛇ', '時', # 3-bit widths
- '🐍' # 4-bit widths
+ '🐍', # 4-bit widths
+ '\ud800', # bpo-34454: Surrogate code point
]
for sep in separators:
# Test that fromisoformat() fails on invalid values
bad_strs = [
'', # Empty string
+ '\ud800', # bpo-34454: Surrogate code point
'2009.04-19T03', # Wrong first separator
'2009-04.19T03', # Wrong second separator
'2009-04-19T0a', # Invalid hours
'2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset
'2009-04-19T03:15:45.123456-24:30', # Invalid negative offset
'2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators
+ '2009-04\ud80010T12:15', # Surrogate char in date
+ '2009-04-10T12\ud80015', # Surrogate char in time
'2009-04-19T1', # Incomplete hours
'2009-04-19T12:3', # Incomplete minutes
'2009-04-19T12:30:4', # Incomplete seconds
def test_fromisoformat_fails(self):
bad_strs = [
'', # Empty string
+ '12\ud80000', # Invalid separator - surrogate char
'12:', # Ends on a separator
'12:30:', # Ends on a separator
'12:30:15.', # Ends on a separator
Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
+ if (dt_ptr == NULL) {
+ goto invalid_string_error;
+ }
int year = 0, month = 0, day = 0;
}
if (rv < 0) {
- PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s",
- dt_ptr);
- return NULL;
+ goto invalid_string_error;
}
return new_date_subclass_ex(year, month, day, cls);
+
+invalid_string_error:
+ PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R",
+ dtstr);
+ return NULL;
}
Py_ssize_t len;
const char *p = PyUnicode_AsUTF8AndSize(tstr, &len);
+ if (p == NULL) {
+ goto invalid_string_error;
+ }
+
int hour = 0, minute = 0, second = 0, microsecond = 0;
int tzoffset, tzimicrosecond = 0;
int rv = parse_isoformat_time(p, len,
&tzoffset, &tzimicrosecond);
if (rv < 0) {
- PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p);
- return NULL;
+ goto invalid_string_error;
}
PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset,
Py_DECREF(tzinfo);
return t;
+
+invalid_string_error:
+ PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", tstr);
+ return NULL;
}
return result;
}
+static PyObject *
+_sanitize_isoformat_str(PyObject *dtstr, int *needs_decref) {
+ // `fromisoformat` allows surrogate characters in exactly one position,
+ // the separator; to allow datetime_fromisoformat to make the simplifying
+ // assumption that all valid strings can be encoded in UTF-8, this function
+ // replaces any surrogate character separators with `T`.
+ Py_ssize_t len = PyUnicode_GetLength(dtstr);
+ *needs_decref = 0;
+ if (len <= 10 || !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) {
+ return dtstr;
+ }
+
+ PyObject *str_out = PyUnicode_New(len, PyUnicode_MAX_CHAR_VALUE(dtstr));
+ if (str_out == NULL) {
+ return NULL;
+ }
+
+ if (PyUnicode_CopyCharacters(str_out, 0, dtstr, 0, len) == -1 ||
+ PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) {
+ Py_DECREF(str_out);
+ return NULL;
+ }
+
+ *needs_decref = 1;
+ return str_out;
+}
+
static PyObject *
datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
assert(dtstr != NULL);
return NULL;
}
+ int needs_decref = 0;
+ dtstr = _sanitize_isoformat_str(dtstr, &needs_decref);
+ if (dtstr == NULL) {
+ goto error;
+ }
+
Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
- const char * p = dt_ptr;
+
+ if (dt_ptr == NULL) {
+ goto invalid_string_error;
+ }
+
+ const char *p = dt_ptr;
int year = 0, month = 0, day = 0;
int hour = 0, minute = 0, second = 0, microsecond = 0;
&tzoffset, &tzusec);
}
if (rv < 0) {
- PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr);
- return NULL;
+ goto invalid_string_error;
}
PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec);
if (tzinfo == NULL) {
- return NULL;
+ goto error;
}
PyObject *dt = new_datetime_subclass_ex(year, month, day, hour, minute,
second, microsecond, tzinfo, cls);
Py_DECREF(tzinfo);
+ if (needs_decref) {
+ Py_DECREF(dtstr);
+ }
return dt;
+
+invalid_string_error:
+ PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr);
+
+error:
+ if (needs_decref) {
+ Py_DECREF(dtstr);
+ }
+
+ return NULL;
}