|
|
4898f3 |
From cbf5299a065e20a5b129ad5eed6953262ce54f37 Mon Sep 17 00:00:00 2001
|
|
|
4898f3 |
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
|
|
4898f3 |
Date: Wed, 21 Feb 2024 06:55:19 -0500
|
|
|
4898f3 |
Subject: [PATCH 6/6] Fix accidental loss-of-precision for to_datetime(str,
|
|
|
4898f3 |
unit=...)
|
|
|
4898f3 |
|
|
|
4898f3 |
In Pandas 1.5.3, the `float(val)` cast was inline to the
|
|
|
4898f3 |
`cast_from_unit` call in `array_with_unit_to_datetime`. This caused the
|
|
|
4898f3 |
intermediate (unnamed) value to be a Python float.
|
|
|
4898f3 |
|
|
|
4898f3 |
Since #50301, a temporary variable was added to avoid multiple casts,
|
|
|
4898f3 |
but with explicit type `cdef float`, which defines a _Cython_ float.
|
|
|
4898f3 |
This type is 32-bit, and causes a loss of precision, and a regression in
|
|
|
4898f3 |
parsing from 1.5.3.
|
|
|
4898f3 |
|
|
|
4898f3 |
So widen the explicit type of the temporary `fval` variable to (64-bit)
|
|
|
4898f3 |
`double`, which will not lose precision.
|
|
|
4898f3 |
|
|
|
4898f3 |
Fixes #57051
|
|
|
4898f3 |
|
|
|
4898f3 |
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
|
|
4898f3 |
---
|
|
|
4898f3 |
pandas/_libs/tslib.pyx | 2 +-
|
|
|
4898f3 |
pandas/tests/tools/test_to_datetime.py | 8 ++++++++
|
|
|
4898f3 |
2 files changed, 9 insertions(+), 1 deletion(-)
|
|
|
4898f3 |
|
|
|
4898f3 |
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
|
|
|
4898f3 |
index 017fdc4bc8..dd23c2f27c 100644
|
|
|
4898f3 |
--- a/pandas/_libs/tslib.pyx
|
|
|
4898f3 |
+++ b/pandas/_libs/tslib.pyx
|
|
|
4898f3 |
@@ -277,7 +277,7 @@ def array_with_unit_to_datetime(
|
|
|
4898f3 |
bint is_raise = errors == "raise"
|
|
|
4898f3 |
ndarray[int64_t] iresult
|
|
|
4898f3 |
tzinfo tz = None
|
|
|
4898f3 |
- float fval
|
|
|
4898f3 |
+ double fval
|
|
|
4898f3 |
|
|
|
4898f3 |
assert is_ignore or is_coerce or is_raise
|
|
|
4898f3 |
|
|
|
4898f3 |
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
|
|
|
4898f3 |
index 6791ac0340..a4194dcff2 100644
|
|
|
4898f3 |
--- a/pandas/tests/tools/test_to_datetime.py
|
|
|
4898f3 |
+++ b/pandas/tests/tools/test_to_datetime.py
|
|
|
4898f3 |
@@ -1912,6 +1912,14 @@ class TestToDatetimeUnit:
|
|
|
4898f3 |
with pytest.raises(ValueError, match=msg):
|
|
|
4898f3 |
to_datetime([1], unit="D", format="%Y%m%d", cache=cache)
|
|
|
4898f3 |
|
|
|
4898f3 |
+ def test_unit_str(self, cache):
|
|
|
4898f3 |
+ # GH 57051
|
|
|
4898f3 |
+ # Test that strs aren't dropping precision to 32-bit accidentally.
|
|
|
4898f3 |
+ with tm.assert_produces_warning(FutureWarning):
|
|
|
4898f3 |
+ res = pd.to_datetime(["1704660000"], unit="s", origin="unix")
|
|
|
4898f3 |
+ expected = pd.to_datetime([1704660000], unit="s", origin="unix")
|
|
|
4898f3 |
+ tm.assert_index_equal(res, expected)
|
|
|
4898f3 |
+
|
|
|
4898f3 |
def test_unit_array_mixed_nans(self, cache):
|
|
|
4898f3 |
values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
|
|
|
4898f3 |
result = to_datetime(values, unit="D", errors="ignore", cache=cache)
|
|
|
4898f3 |
--
|
|
|
4898f3 |
2.43.0
|
|
|
4898f3 |
|