Skip to content

Commit febd0ca

Browse files
Fix bug causing TypeError when converting Rust encoding errors to Python
1 parent 5fcc5d7 commit febd0ca

File tree

8 files changed

+147
-26
lines changed

8 files changed

+147
-26
lines changed

guide/src/migration.md

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,37 @@ This should not require migration, nor is there expected to be breakage caused b
7070

7171
Nevertheless, this affects the order of initialization so seemed worth noting in this guide.
7272

73+
### Removed implementations of `From<str::Utf8Error>`, `From<string::FromUtf16Error>`, and `From<char::DecodeUtf16Error>` for `PyErr`
74+
75+
Previously the implementations of `From<string::FromUtf8Error>`, `From<ffi::IntoStringError>`, `From<str::Utf8Error>`, `From<string::FromUtf16Error>`, and `From<char::DecodeUtf16Error>` failed to construct the correct Python exception class, as reported in <https://github.com/PyO3/pyo3/issues/5651>.
76+
The implementations for `string::FromUtf8Error` and `ffi::IntoStringError` were fixed in this release.
77+
78+
For `str::Utf8Error`, the Rust error does not contain the source bytes required to construct the Python exception.
79+
Instead, `PyUnicodeDecodeError::new_err_from_utf8` can be used to convert the error to a `PyErr`.
80+
81+
Before:
82+
83+
```rust,ignore
84+
fn bytes_to_str(bytes: &[u8]) -> PyResult<&str> {
85+
Ok(str::from_utf8(bytes)?)
86+
}
87+
```
88+
89+
After:
90+
91+
```rust
92+
# use pyo3::prelude::*;
93+
use pyo3::exceptions::PyUnicodeDecodeError;
94+
95+
# #[expect(dead_code)]
96+
fn bytes_to_str(bytes: &[u8]) -> PyResult<&str> {
97+
str::from_utf8(bytes).map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(bytes, e))
98+
}
99+
```
100+
101+
For `string::FromUtf16Error` and `char::DecodeUtf16Error` the Rust error types do not contain any of the information required to construct a `UnicodeDecodeError`.
102+
To raise a Python `UnicodeDecodeError` a new error should be manually constructed by calling `PyUnicodeDecodeError::new_err(...)`.
103+
73104
## from 0.26.* to 0.27
74105

75106
### `FromPyObject` reworked for flexibility and efficiency
@@ -1132,7 +1163,7 @@ let obj: Py<PyList> = bound.unbind();
11321163

11331164
> [!WARNING]
11341165
> Dangling pointer trap 💣
1135-
>
1166+
>
11361167
> Because of the ownership changes, code which uses `.as_ptr()` to convert `&PyAny` and other GIL Refs to a `*mut pyo3_ffi::PyObject` should take care to avoid creating dangling pointers now that `Bound<PyAny>` carries ownership.
11371168
>
11381169
> For example, the following pattern with `Option<&PyAny>` can easily create a dangling pointer when migrating to the `Bound<PyAny>` smart pointer:

newsfragments/5668.fixed.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed the implementations of `From<string::FromUtf8Error>` and `From<ffi::IntoStringError>` for `PyErr`.
2+
Removed the failing implementations of `From<str::Utf8Error>`, `From<string::FromUtf16Error>`, and `From<char::DecodeUtf16Error>` for `PyErr`.

src/conversions/std/cstring.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::exceptions::PyUnicodeDecodeError;
12
#[cfg(feature = "experimental-inspect")]
23
use crate::inspect::TypeHint;
34
#[cfg(feature = "experimental-inspect")]
@@ -6,7 +7,6 @@ use crate::types::PyString;
67
use crate::{Borrowed, Bound, FromPyObject, IntoPyObject, PyAny, PyErr, Python};
78
use std::borrow::Cow;
89
use std::ffi::{CStr, CString};
9-
use std::str::Utf8Error;
1010
#[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
1111
use {
1212
crate::{exceptions::PyValueError, ffi},
@@ -16,21 +16,24 @@ use {
1616
impl<'py> IntoPyObject<'py> for &CStr {
1717
type Target = PyString;
1818
type Output = Bound<'py, Self::Target>;
19-
type Error = Utf8Error;
19+
type Error = PyErr;
2020

2121
#[cfg(feature = "experimental-inspect")]
2222
const OUTPUT_TYPE: TypeHint = <&str>::OUTPUT_TYPE;
2323

2424
#[inline]
2525
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
26-
self.to_str()?.into_pyobject(py).map_err(|err| match err {})
26+
self.to_str()
27+
.map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(self.to_bytes(), e))?
28+
.into_pyobject(py)
29+
.map_err(|err| match err {})
2730
}
2831
}
2932

3033
impl<'py> IntoPyObject<'py> for CString {
3134
type Target = PyString;
3235
type Output = Bound<'py, Self::Target>;
33-
type Error = Utf8Error;
36+
type Error = PyErr;
3437

3538
#[cfg(feature = "experimental-inspect")]
3639
const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE;
@@ -44,7 +47,7 @@ impl<'py> IntoPyObject<'py> for CString {
4447
impl<'py> IntoPyObject<'py> for &CString {
4548
type Target = PyString;
4649
type Output = Bound<'py, Self::Target>;
47-
type Error = Utf8Error;
50+
type Error = PyErr;
4851

4952
#[cfg(feature = "experimental-inspect")]
5053
const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE;
@@ -58,7 +61,7 @@ impl<'py> IntoPyObject<'py> for &CString {
5861
impl<'py> IntoPyObject<'py> for Cow<'_, CStr> {
5962
type Target = PyString;
6063
type Output = Bound<'py, Self::Target>;
61-
type Error = Utf8Error;
64+
type Error = PyErr;
6265

6366
#[cfg(feature = "experimental-inspect")]
6467
const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE;
@@ -71,7 +74,7 @@ impl<'py> IntoPyObject<'py> for Cow<'_, CStr> {
7174
impl<'py> IntoPyObject<'py> for &Cow<'_, CStr> {
7275
type Target = PyString;
7376
type Output = Bound<'py, Self::Target>;
74-
type Error = Utf8Error;
77+
type Error = PyErr;
7578

7679
#[cfg(feature = "experimental-inspect")]
7780
const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE;

src/conversions/std/osstr.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use crate::conversion::IntoPyObject;
2+
#[cfg(target_os = "wasi")]
3+
use crate::exceptions::PyUnicodeDecodeError;
24
use crate::ffi_ptr_ext::FfiPtrExt;
35
#[cfg(feature = "experimental-inspect")]
46
use crate::inspect::TypeHint;
@@ -103,7 +105,11 @@ impl FromPyObject<'_, '_> for OsString {
103105
//
104106
// For WASI: OS strings are UTF-8 by definition.
105107
#[cfg(target_os = "wasi")]
106-
let os_str: &OsStr = OsStr::new(std::str::from_utf8(fs_encoded_bytes.as_bytes())?);
108+
let os_str: &OsStr = OsStr::new(
109+
std::str::from_utf8(fs_encoded_bytes.as_bytes()).map_err(|e| {
110+
PyUnicodeDecodeError::new_err_from_utf8(fs_encoded_bytes.as_bytes(), e)
111+
})?,
112+
);
107113
#[cfg(not(target_os = "wasi"))]
108114
let os_str: &OsStr =
109115
std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes());

src/err/impls.rs

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::{err::PyErrArguments, exceptions, PyErr, Python};
1+
use crate::{err::PyErrArguments, exceptions, types, PyErr, Python};
22
use crate::{IntoPyObject, Py, PyAny};
33
use std::io;
44

@@ -118,23 +118,70 @@ macro_rules! impl_to_pyerr {
118118
};
119119
}
120120

121+
impl PyErrArguments for exceptions::Utf8ErrorWithBytes {
122+
fn arguments(self, py: Python<'_>) -> Py<PyAny> {
123+
let Self { err, bytes } = self;
124+
let start = err.valid_up_to();
125+
let end = err.error_len().map_or(bytes.len(), |l| start + l);
126+
127+
let encoding = types::PyString::new(py, "utf-8").into_any();
128+
let bytes = types::PyBytes::new(py, &bytes).into_any();
129+
let start = types::PyInt::new(py, start).into_any();
130+
let end = types::PyInt::new(py, end).into_any();
131+
let reason = types::PyString::new(py, "invalid utf-8").into_any();
132+
133+
// FIXME(icxolu) remove unwrap
134+
types::PyTuple::new(py, &[encoding, bytes, start, end, reason])
135+
.unwrap()
136+
.into_any()
137+
.unbind()
138+
}
139+
}
140+
141+
impl std::convert::From<exceptions::Utf8ErrorWithBytes> for PyErr {
142+
fn from(err: exceptions::Utf8ErrorWithBytes) -> PyErr {
143+
exceptions::PyUnicodeDecodeError::new_err(err)
144+
}
145+
}
146+
147+
impl PyErrArguments for std::string::FromUtf8Error {
148+
fn arguments(self, py: Python<'_>) -> Py<PyAny> {
149+
exceptions::Utf8ErrorWithBytes {
150+
err: self.utf8_error(),
151+
bytes: self.into_bytes(),
152+
}
153+
.arguments(py)
154+
}
155+
}
156+
157+
impl std::convert::From<std::string::FromUtf8Error> for PyErr {
158+
fn from(err: std::string::FromUtf8Error) -> PyErr {
159+
exceptions::PyUnicodeDecodeError::new_err(err)
160+
}
161+
}
162+
163+
impl PyErrArguments for std::ffi::IntoStringError {
164+
fn arguments(self, py: Python<'_>) -> Py<PyAny> {
165+
exceptions::Utf8ErrorWithBytes {
166+
err: self.utf8_error(),
167+
bytes: self.into_cstring().into_bytes(),
168+
}
169+
.arguments(py)
170+
}
171+
}
172+
173+
impl std::convert::From<std::ffi::IntoStringError> for PyErr {
174+
fn from(err: std::ffi::IntoStringError) -> PyErr {
175+
exceptions::PyUnicodeDecodeError::new_err(err)
176+
}
177+
}
178+
121179
impl_to_pyerr!(std::array::TryFromSliceError, exceptions::PyValueError);
122180
impl_to_pyerr!(std::num::ParseIntError, exceptions::PyValueError);
123181
impl_to_pyerr!(std::num::ParseFloatError, exceptions::PyValueError);
124182
impl_to_pyerr!(std::num::TryFromIntError, exceptions::PyValueError);
125183
impl_to_pyerr!(std::str::ParseBoolError, exceptions::PyValueError);
126-
impl_to_pyerr!(std::ffi::IntoStringError, exceptions::PyUnicodeDecodeError);
127184
impl_to_pyerr!(std::ffi::NulError, exceptions::PyValueError);
128-
impl_to_pyerr!(std::str::Utf8Error, exceptions::PyUnicodeDecodeError);
129-
impl_to_pyerr!(std::string::FromUtf8Error, exceptions::PyUnicodeDecodeError);
130-
impl_to_pyerr!(
131-
std::string::FromUtf16Error,
132-
exceptions::PyUnicodeDecodeError
133-
);
134-
impl_to_pyerr!(
135-
std::char::DecodeUtf16Error,
136-
exceptions::PyUnicodeDecodeError
137-
);
138185
impl_to_pyerr!(std::net::AddrParseError, exceptions::PyValueError);
139186

140187
#[cfg(test)]

src/exceptions.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,11 @@ impl_windows_native_exception!(
703703
native_doc!("WindowsError")
704704
);
705705

706+
pub(crate) struct Utf8ErrorWithBytes {
707+
pub(crate) err: std::str::Utf8Error,
708+
pub(crate) bytes: Vec<u8>,
709+
}
710+
706711
impl PyUnicodeDecodeError {
707712
/// Creates a Python `UnicodeDecodeError`.
708713
pub fn new<'py>(
@@ -754,8 +759,30 @@ impl PyUnicodeDecodeError {
754759
input: &[u8],
755760
err: std::str::Utf8Error,
756761
) -> PyResult<Bound<'py, PyUnicodeDecodeError>> {
757-
let pos = err.valid_up_to();
758-
PyUnicodeDecodeError::new(py, c"utf-8", input, pos..(pos + 1), c"invalid utf-8")
762+
let start = err.valid_up_to();
763+
let end = err.error_len().map_or(input.len(), |l| start + l);
764+
PyUnicodeDecodeError::new(py, c"utf-8", input, start..end, c"invalid utf-8")
765+
}
766+
767+
/// Create a new [`crate::PyErr`] of this type from a Rust UTF-8 decoding error.
768+
///
769+
/// # Example
770+
///
771+
/// ```
772+
/// use pyo3::prelude::*;
773+
/// use pyo3::exceptions::PyUnicodeDecodeError;
774+
///
775+
/// let invalid_utf8 = b"fo\xd8o";
776+
/// # #[expect(invalid_from_utf8)]
777+
/// let err = std::str::from_utf8(invalid_utf8).expect_err("should be invalid utf8");
778+
/// let py_err = PyUnicodeDecodeError::new_err_from_utf8(invalid_utf8, err);
779+
/// ```
780+
pub fn new_err_from_utf8(bytes: &[u8], err: std::str::Utf8Error) -> crate::PyErr {
781+
Utf8ErrorWithBytes {
782+
err,
783+
bytes: bytes.to_vec(),
784+
}
785+
.into()
759786
}
760787
}
761788

src/impl_/pymodule.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use std::sync::atomic::{AtomicI64, Ordering};
2424

2525
#[cfg(not(any(PyPy, GraalPy)))]
2626
use crate::exceptions::PyImportError;
27+
use crate::exceptions::PyUnicodeDecodeError;
2728
use crate::prelude::PyTypeMethods;
2829
use crate::{
2930
ffi,
@@ -152,7 +153,11 @@ impl ModuleDef {
152153

153154
let ffi_def = self.ffi_def.get();
154155

155-
let name = unsafe { CStr::from_ptr((*ffi_def).m_name).to_str()? }.to_string();
156+
let m_name = unsafe { CStr::from_ptr((*ffi_def).m_name) };
157+
let name = m_name
158+
.to_str()
159+
.map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(m_name.to_bytes(), e))?
160+
.to_string();
156161
let kwargs = PyDict::new(py);
157162
kwargs.set_item("name", name)?;
158163
let spec = simple_ns.call((), Some(&kwargs))?;

tests/ui/invalid_result_conversion.stderr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ error[E0277]: the trait bound `PyErr: From<MyError>` is not satisfied
88
`PyErr` implements `From<AddrParseError>`
99
`PyErr` implements `From<CastError<'_, '_>>`
1010
`PyErr` implements `From<CastIntoError<'_>>`
11-
`PyErr` implements `From<DecodeUtf16Error>`
1211
`PyErr` implements `From<DowncastError<'_, '_>>`
1312
`PyErr` implements `From<DowncastIntoError<'_>>`
14-
`PyErr` implements `From<FromUtf16Error>`
1513
`PyErr` implements `From<FromUtf8Error>`
14+
`PyErr` implements `From<Infallible>`
15+
`PyErr` implements `From<IntoInnerError<W>>`
1616
and $N others
1717
= note: required for `MyError` to implement `Into<PyErr>`

0 commit comments

Comments
 (0)