Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ Sparse

ExtensionArray
^^^^^^^^^^^^^^
- Fixed bug in :meth:`Series.apply` and :meth:`Series.map` where nullable integer dtypes were converted to float, causing precision loss for large integers; now the nullable dtype will be preserved (:issue:`63903`).
-
-

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -1688,7 +1688,12 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis)

def map(self, mapper, na_action: Literal["ignore"] | None = None):
return map_array(self.to_numpy(), mapper, na_action=na_action)
result = map_array(
self.to_numpy(dtype=object, na_value=libmissing.NA),
mapper,
na_action=na_action,
)
return self._cast_pointwise_result(result)

@overload
def any(
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,3 +667,14 @@ def test_series_apply_unpack_nested_data():
result = ser.apply(lambda x: Series(x))
expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]})
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("dtype", ["Int64", "UInt64"])
def test_apply_nullable_integer_precision(dtype):
# GH#63903
large_int = 10000000000000001 # above float64 integer precision limit
ser = Series([large_int, None], dtype=dtype)

result = ser.apply(lambda x: x + 2 if pd.notna(x) else x)
expected = Series([large_int + 2, pd.NA], dtype=dtype)
tm.assert_series_equal(result, expected)
19 changes: 5 additions & 14 deletions pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,25 +173,16 @@ def skip_if_doesnt_support_2d(self, dtype, request):

@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map(self, data_missing, na_action, using_nan_is_na):
# GH#63903
result = data_missing.map(lambda x: x, na_action=na_action)
if data_missing.dtype == Float32Dtype() and using_nan_is_na:
# map roundtrips through objects, which converts to float64
expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
else:
expected = data_missing.to_numpy()
tm.assert_numpy_array_equal(result, expected)
tm.assert_extension_array_equal(result, data_missing)

def test_map_na_action_ignore(self, data_missing_for_sorting, using_nan_is_na):
# GH#63903
zero = data_missing_for_sorting[2]
result = data_missing_for_sorting.map(lambda x: zero, na_action="ignore")
if data_missing_for_sorting.dtype.kind == "b":
expected = np.array([False, pd.NA, False], dtype=object)
elif not using_nan_is_na:
# TODO: would we prefer to get NaN in this case to get a non-object?
expected = np.array([zero, pd.NA, zero], dtype=object)
else:
expected = np.array([zero, np.nan, zero])
tm.assert_numpy_array_equal(result, expected)
expected = pd.array([zero, pd.NA, zero], dtype=data_missing_for_sorting.dtype)
tm.assert_extension_array_equal(result, expected)

def _get_expected_exception(self, op_name, obj, other):
try:
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/series/methods/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays.masked import BaseMaskedDtype

# The fixture it's mostly used in pandas/tests/apply, so it's defined in that
# conftest, which is out of scope here. So we need to manually import
Expand Down Expand Up @@ -243,7 +244,11 @@ def test_map_empty(request, index):
s = Series(index)
result = s.map({})

expected = Series(np.nan, index=s.index)
# GH#63903
if isinstance(s.dtype, BaseMaskedDtype):
expected = Series(pd.NA, index=s.index, dtype=s.dtype)
else:
expected = Series(np.nan, index=s.index)
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -677,3 +682,14 @@ def test_map_pyarrow_timestamp(as_td):
# we don't for Series.map
expected_index = Index(expected).astype("int64[pyarrow]")
tm.assert_index_equal(res_index, expected_index)


@pytest.mark.parametrize("dtype", ["Int64", "UInt64"])
def test_map_nullable_integer_precision(dtype):
# GH#63903
large_int = 10000000000000001 # above float64 integer precision limit
ser = Series([large_int, None], dtype=dtype)

result = ser.map(lambda x: x + 2 if pd.notna(x) else x)
expected = Series([large_int + 2, pd.NA], dtype=dtype)
tm.assert_series_equal(result, expected)
Loading