diff --git a/guide/src/migration.md b/guide/src/migration.md index 70463842334..83f18e5cfd7 100644 --- a/guide/src/migration.md +++ b/guide/src/migration.md @@ -70,6 +70,37 @@ This should not require migration, nor is there expected to be breakage caused b Nevertheless, this affects the order of initialization so seemed worth noting in this guide. +### Removed implementations of `From`, `From`, and `From` for `PyErr` + +Previously the implementations of `From`, `From`, `From`, `From`, and `From` failed to construct the correct Python exception class, as reported in . +The implementations for `string::FromUtf8Error` and `ffi::IntoStringError` were fixed in this release. + +For `str::Utf8Error`, the Rust error does not contain the source bytes required to construct the Python exception. +Instead, `PyUnicodeDecodeError::new_err_from_utf8` can be used to convert the error to a `PyErr`. + +Before: + +```rust,ignore +fn bytes_to_str(bytes: &[u8]) -> PyResult<&str> { + Ok(str::from_utf8(bytes)?) +} +``` + +After: + +```rust +# use pyo3::prelude::*; +use pyo3::exceptions::PyUnicodeDecodeError; + +# #[expect(dead_code)] +fn bytes_to_str(bytes: &[u8]) -> PyResult<&str> { + str::from_utf8(bytes).map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(bytes, e)) +} +``` + +For `string::FromUtf16Error` and `char::DecodeUtf16Error` the Rust error types do not contain any of the information required to construct a `UnicodeDecodeError`. +To raise a Python `UnicodeDecodeError` a new error should be manually constructed by calling `PyUnicodeDecodeError::new_err(...)`. + ## from 0.26.* to 0.27 ### `FromPyObject` reworked for flexibility and efficiency @@ -1132,7 +1163,7 @@ let obj: Py = bound.unbind(); > [!WARNING] > Dangling pointer trap 💣 -> +> > Because of the ownership changes, code which uses `.as_ptr()` to convert `&PyAny` and other GIL Refs to a `*mut pyo3_ffi::PyObject` should take care to avoid creating dangling pointers now that `Bound` carries ownership. > > For example, the following pattern with `Option<&PyAny>` can easily create a dangling pointer when migrating to the `Bound` smart pointer: diff --git a/newsfragments/5668.added.md b/newsfragments/5668.added.md new file mode 100644 index 00000000000..e25dcab2314 --- /dev/null +++ b/newsfragments/5668.added.md @@ -0,0 +1 @@ +Added new `PyUnicodeDecodeError::new_err_from_utf8` API to create a `PyErr` from a `str::Utf8Error` diff --git a/newsfragments/5668.fixed.md b/newsfragments/5668.fixed.md new file mode 100644 index 00000000000..1b89b4fb10b --- /dev/null +++ b/newsfragments/5668.fixed.md @@ -0,0 +1 @@ +Fixed the implementations of `From` and `From` for `PyErr`. diff --git a/newsfragments/5668.removed.md b/newsfragments/5668.removed.md new file mode 100644 index 00000000000..990aa7baf4c --- /dev/null +++ b/newsfragments/5668.removed.md @@ -0,0 +1 @@ +Removed the failing implementations of `From`, `From`, and `From` for `PyErr`. diff --git a/src/conversions/std/cstring.rs b/src/conversions/std/cstring.rs index e0b2a22f076..d59aa207705 100644 --- a/src/conversions/std/cstring.rs +++ b/src/conversions/std/cstring.rs @@ -1,3 +1,4 @@ +use crate::exceptions::PyUnicodeDecodeError; #[cfg(feature = "experimental-inspect")] use crate::inspect::TypeHint; #[cfg(feature = "experimental-inspect")] @@ -6,7 +7,6 @@ use crate::types::PyString; use crate::{Borrowed, Bound, FromPyObject, IntoPyObject, PyAny, PyErr, Python}; use std::borrow::Cow; use std::ffi::{CStr, CString}; -use std::str::Utf8Error; #[cfg(any(Py_3_10, not(Py_LIMITED_API)))] use { crate::{exceptions::PyValueError, ffi}, @@ -16,21 +16,24 @@ use { impl<'py> IntoPyObject<'py> for &CStr { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: TypeHint = <&str>::OUTPUT_TYPE; #[inline] fn into_pyobject(self, py: Python<'py>) -> Result { - self.to_str()?.into_pyobject(py).map_err(|err| match err {}) + self.to_str() + .map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(self.to_bytes(), e))? + .into_pyobject(py) + .map_err(|err| match err {}) } } impl<'py> IntoPyObject<'py> for CString { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE; @@ -44,7 +47,7 @@ impl<'py> IntoPyObject<'py> for CString { impl<'py> IntoPyObject<'py> for &CString { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE; @@ -58,7 +61,7 @@ impl<'py> IntoPyObject<'py> for &CString { impl<'py> IntoPyObject<'py> for Cow<'_, CStr> { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE; @@ -71,7 +74,7 @@ impl<'py> IntoPyObject<'py> for Cow<'_, CStr> { impl<'py> IntoPyObject<'py> for &Cow<'_, CStr> { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: TypeHint = <&CStr>::OUTPUT_TYPE; diff --git a/src/conversions/std/osstr.rs b/src/conversions/std/osstr.rs index 1efd1f5161d..4d71f69c704 100644 --- a/src/conversions/std/osstr.rs +++ b/src/conversions/std/osstr.rs @@ -1,4 +1,6 @@ use crate::conversion::IntoPyObject; +#[cfg(target_os = "wasi")] +use crate::exceptions::PyUnicodeDecodeError; use crate::ffi_ptr_ext::FfiPtrExt; #[cfg(feature = "experimental-inspect")] use crate::inspect::TypeHint; @@ -103,7 +105,11 @@ impl FromPyObject<'_, '_> for OsString { // // For WASI: OS strings are UTF-8 by definition. #[cfg(target_os = "wasi")] - let os_str: &OsStr = OsStr::new(std::str::from_utf8(fs_encoded_bytes.as_bytes())?); + let os_str: &OsStr = OsStr::new( + std::str::from_utf8(fs_encoded_bytes.as_bytes()).map_err(|e| { + PyUnicodeDecodeError::new_err_from_utf8(fs_encoded_bytes.as_bytes(), e) + })?, + ); #[cfg(not(target_os = "wasi"))] let os_str: &OsStr = std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes()); diff --git a/src/err/impls.rs b/src/err/impls.rs index c49c7a56f82..a29eecd32a0 100644 --- a/src/err/impls.rs +++ b/src/err/impls.rs @@ -1,4 +1,4 @@ -use crate::{err::PyErrArguments, exceptions, PyErr, Python}; +use crate::{err::PyErrArguments, exceptions, types, PyErr, Python}; use crate::{IntoPyObject, Py, PyAny}; use std::io; @@ -118,28 +118,82 @@ macro_rules! impl_to_pyerr { }; } +pub(crate) struct Utf8ErrorWithBytes { + pub(crate) err: std::str::Utf8Error, + pub(crate) bytes: Vec, +} + +impl PyErrArguments for Utf8ErrorWithBytes { + fn arguments(self, py: Python<'_>) -> Py { + let Self { err, bytes } = self; + let start = err.valid_up_to(); + let end = err.error_len().map_or(bytes.len(), |l| start + l); + + let encoding = types::PyString::new(py, "utf-8").into_any(); + let bytes = types::PyBytes::new(py, &bytes).into_any(); + let start = types::PyInt::new(py, start).into_any(); + let end = types::PyInt::new(py, end).into_any(); + let reason = types::PyString::new(py, "invalid utf-8").into_any(); + + // FIXME(icxolu) remove unwrap + types::PyTuple::new(py, &[encoding, bytes, start, end, reason]) + .unwrap() + .into_any() + .unbind() + } +} + +impl std::convert::From for PyErr { + fn from(err: Utf8ErrorWithBytes) -> PyErr { + exceptions::PyUnicodeDecodeError::new_err(err) + } +} + +impl PyErrArguments for std::string::FromUtf8Error { + fn arguments(self, py: Python<'_>) -> Py { + Utf8ErrorWithBytes { + err: self.utf8_error(), + bytes: self.into_bytes(), + } + .arguments(py) + } +} + +impl std::convert::From for PyErr { + fn from(err: std::string::FromUtf8Error) -> PyErr { + exceptions::PyUnicodeDecodeError::new_err(err) + } +} + +impl PyErrArguments for std::ffi::IntoStringError { + fn arguments(self, py: Python<'_>) -> Py { + Utf8ErrorWithBytes { + err: self.utf8_error(), + bytes: self.into_cstring().into_bytes(), + } + .arguments(py) + } +} + +impl std::convert::From for PyErr { + fn from(err: std::ffi::IntoStringError) -> PyErr { + exceptions::PyUnicodeDecodeError::new_err(err) + } +} + impl_to_pyerr!(std::array::TryFromSliceError, exceptions::PyValueError); impl_to_pyerr!(std::num::ParseIntError, exceptions::PyValueError); impl_to_pyerr!(std::num::ParseFloatError, exceptions::PyValueError); impl_to_pyerr!(std::num::TryFromIntError, exceptions::PyValueError); impl_to_pyerr!(std::str::ParseBoolError, exceptions::PyValueError); -impl_to_pyerr!(std::ffi::IntoStringError, exceptions::PyUnicodeDecodeError); impl_to_pyerr!(std::ffi::NulError, exceptions::PyValueError); -impl_to_pyerr!(std::str::Utf8Error, exceptions::PyUnicodeDecodeError); -impl_to_pyerr!(std::string::FromUtf8Error, exceptions::PyUnicodeDecodeError); -impl_to_pyerr!( - std::string::FromUtf16Error, - exceptions::PyUnicodeDecodeError -); -impl_to_pyerr!( - std::char::DecodeUtf16Error, - exceptions::PyUnicodeDecodeError -); impl_to_pyerr!(std::net::AddrParseError, exceptions::PyValueError); #[cfg(test)] mod tests { - use crate::{PyErr, Python}; + use super::*; + + use crate::{types::PyAnyMethods, IntoPyObjectExt, PyErr, Python}; use std::io; #[test] @@ -179,4 +233,69 @@ mod tests { check_err(io::ErrorKind::IsADirectory, "IsADirectoryError"); check_err(io::ErrorKind::NotADirectory, "NotADirectoryError"); } + + #[test] + #[allow(invalid_from_utf8)] + fn utf8_errors() { + let bytes = b"abc\xffdef".to_vec(); + + let check_err = |py_err: PyErr| { + Python::attach(|py| { + let py_err = py_err.into_bound_py_any(py).unwrap(); + + assert!(py_err.is_instance_of::()); + assert_eq!( + py_err + .getattr("encoding") + .unwrap() + .extract::() + .unwrap(), + "utf-8" + ); + assert_eq!( + py_err + .getattr("object") + .unwrap() + .extract::>() + .unwrap(), + &*bytes + ); + assert_eq!( + py_err.getattr("start").unwrap().extract::().unwrap(), + 3 + ); + assert_eq!( + py_err.getattr("end").unwrap().extract::().unwrap(), + 4 + ); + assert_eq!( + py_err + .getattr("reason") + .unwrap() + .extract::() + .unwrap(), + "invalid utf-8" + ); + }); + }; + + let utf8_err_with_bytes = Utf8ErrorWithBytes { + err: std::str::from_utf8(&bytes).expect_err("\\xff is invalid utf-8"), + bytes: bytes.clone(), + } + .into(); + check_err(utf8_err_with_bytes); + + let from_utf8_err = String::from_utf8(bytes.clone()) + .expect_err("\\xff is invalid utf-8") + .into(); + check_err(from_utf8_err); + + let from_utf8_err = std::ffi::CString::new(bytes.clone()) + .unwrap() + .into_string() + .expect_err("\\xff is invalid utf-8") + .into(); + check_err(from_utf8_err); + } } diff --git a/src/err/mod.rs b/src/err/mod.rs index 9388951ec53..ab82aa971b5 100644 --- a/src/err/mod.rs +++ b/src/err/mod.rs @@ -29,6 +29,7 @@ mod impls; pub use cast_error::{CastError, CastIntoError}; #[allow(deprecated)] pub use downcast_error::{DowncastError, DowncastIntoError}; +pub(crate) use impls::Utf8ErrorWithBytes; /// Represents a Python exception. /// diff --git a/src/exceptions.rs b/src/exceptions.rs index 5d5ac6b86f9..a6aad40b9b4 100644 --- a/src/exceptions.rs +++ b/src/exceptions.rs @@ -754,8 +754,30 @@ impl PyUnicodeDecodeError { input: &[u8], err: std::str::Utf8Error, ) -> PyResult> { - let pos = err.valid_up_to(); - PyUnicodeDecodeError::new(py, c"utf-8", input, pos..(pos + 1), c"invalid utf-8") + let start = err.valid_up_to(); + let end = err.error_len().map_or(input.len(), |l| start + l); + PyUnicodeDecodeError::new(py, c"utf-8", input, start..end, c"invalid utf-8") + } + + /// Create a new [`crate::PyErr`] of this type from a Rust UTF-8 decoding error. + /// + /// # Example + /// + /// ``` + /// use pyo3::prelude::*; + /// use pyo3::exceptions::PyUnicodeDecodeError; + /// + /// let invalid_utf8 = b"fo\xd8o"; + /// # #[expect(invalid_from_utf8)] + /// let err = std::str::from_utf8(invalid_utf8).expect_err("should be invalid utf8"); + /// let py_err = PyUnicodeDecodeError::new_err_from_utf8(invalid_utf8, err); + /// ``` + pub fn new_err_from_utf8(bytes: &[u8], err: std::str::Utf8Error) -> crate::PyErr { + crate::err::Utf8ErrorWithBytes { + err, + bytes: bytes.to_vec(), + } + .into() } } @@ -903,7 +925,7 @@ mod tests { use super::*; use crate::types::any::PyAnyMethods; use crate::types::{IntoPyDict, PyDict}; - use crate::PyErr; + use crate::{IntoPyObjectExt as _, PyErr}; import_exception!(socket, gaierror); import_exception!(email.errors, MessageError); @@ -1200,4 +1222,56 @@ mod tests { test_exception!(PyBytesWarning); #[cfg(Py_3_10)] test_exception!(PyEncodingWarning); + + #[test] + #[allow(invalid_from_utf8)] + fn unicode_decode_error_from_utf8() { + let bytes = b"abc\xffdef".to_vec(); + + let check_err = |py_err: PyErr| { + Python::attach(|py| { + let py_err = py_err.into_bound_py_any(py).unwrap(); + + assert!(py_err.is_instance_of::()); + assert_eq!( + py_err + .getattr("encoding") + .unwrap() + .extract::() + .unwrap(), + "utf-8" + ); + assert_eq!( + py_err + .getattr("object") + .unwrap() + .extract::>() + .unwrap(), + &*bytes + ); + assert_eq!( + py_err.getattr("start").unwrap().extract::().unwrap(), + 3 + ); + assert_eq!( + py_err.getattr("end").unwrap().extract::().unwrap(), + 4 + ); + assert_eq!( + py_err + .getattr("reason") + .unwrap() + .extract::() + .unwrap(), + "invalid utf-8" + ); + }); + }; + + let utf8_err_with_bytes = PyUnicodeDecodeError::new_err_from_utf8( + &bytes, + std::str::from_utf8(&bytes).expect_err("\\xff is invalid utf-8"), + ); + check_err(utf8_err_with_bytes); + } } diff --git a/src/impl_/pymodule.rs b/src/impl_/pymodule.rs index b1bf6fb8878..4e671a6042e 100644 --- a/src/impl_/pymodule.rs +++ b/src/impl_/pymodule.rs @@ -24,6 +24,7 @@ use std::sync::atomic::{AtomicI64, Ordering}; #[cfg(not(any(PyPy, GraalPy)))] use crate::exceptions::PyImportError; +use crate::exceptions::PyUnicodeDecodeError; use crate::prelude::PyTypeMethods; use crate::{ ffi, @@ -152,7 +153,11 @@ impl ModuleDef { let ffi_def = self.ffi_def.get(); - let name = unsafe { CStr::from_ptr((*ffi_def).m_name).to_str()? }.to_string(); + let m_name = unsafe { CStr::from_ptr((*ffi_def).m_name) }; + let name = m_name + .to_str() + .map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(m_name.to_bytes(), e))? + .to_string(); let kwargs = PyDict::new(py); kwargs.set_item("name", name)?; let spec = simple_ns.call((), Some(&kwargs))?; diff --git a/tests/ui/invalid_result_conversion.stderr b/tests/ui/invalid_result_conversion.stderr index ef31f073689..4adc47dee38 100644 --- a/tests/ui/invalid_result_conversion.stderr +++ b/tests/ui/invalid_result_conversion.stderr @@ -8,10 +8,10 @@ error[E0277]: the trait bound `PyErr: From` is not satisfied `PyErr` implements `From` `PyErr` implements `From>` `PyErr` implements `From>` - `PyErr` implements `From` `PyErr` implements `From>` `PyErr` implements `From>` - `PyErr` implements `From` `PyErr` implements `From` + `PyErr` implements `From` + `PyErr` implements `From>` and $N others = note: required for `MyError` to implement `Into`