Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions pypdf/_xobj_image_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from typing import Any, Literal, Optional, Union, cast

from ._utils import check_if_whitespace_only, logger_warning
from .constants import ColorSpaces, StreamAttributes
from .constants import ColorSpaces
from .constants import FilterTypes as FT
from .constants import ImageAttributes as IA
from .constants import StreamAttributes
from .errors import EmptyImageDataError, PdfReadError
from .generic import (
ArrayObject,
Expand Down Expand Up @@ -174,14 +175,16 @@ def __handle_flate__indexed(color_space: ArrayObject) -> tuple[Any, Any, Any, An
base, hival = element1.split("\x00")
hival = int(hival)
return color_space, base, hival, lookup
raise PdfReadError(f"Expected color space with 4 values, got {count}: {color_space}")
raise PdfReadError(
f"Expected color space with 4 values, got {count}: {color_space}"
)


def _handle_flate(
size: tuple[int, int],
data: bytes,
mode: mode_str_type,
color_space: str,
color_space: ArrayObject,
colors: int,
obj_as_text: str,
) -> tuple[Image.Image, str, str, bool]:
Expand Down Expand Up @@ -233,13 +236,13 @@ def _handle_flate(
if actual_count < expected_count:
logger_warning(
f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",
__name__
__name__,
)
lookup += bytes([0] * (expected_count - actual_count))
elif not check_if_whitespace_only(lookup[expected_count:]):
logger_warning(
f"Too many lookup values: Expected {expected_count}, got {actual_count}.",
__name__
__name__,
)
lookup = lookup[:expected_count]
colors_arr = [lookup[:nb], lookup[nb:]]
Expand Down Expand Up @@ -280,7 +283,9 @@ def _handle_flate(
# Table 65 - Additional Entries Specific to an ICC Profile Stream Dictionary
mode2 = _get_image_mode(color_space, colors, mode)[0]
if mode != mode2:
img = Image.frombytes(mode2, size, data) # reloaded as mode may have changed
img = Image.frombytes(
mode2, size, data
) # reloaded as mode may have changed
if mode == "CMYK":
extension = ".tif"
image_format = "TIFF"
Expand All @@ -291,7 +296,7 @@ def _handle_jpx(
size: tuple[int, int],
data: bytes,
mode: mode_str_type,
color_space: str,
color_space: ArrayObject,
colors: int,
) -> tuple[Image.Image, str, str, bool]:
"""
Expand Down Expand Up @@ -336,12 +341,14 @@ def _apply_decode(
# requires reverting scale (cf p243,2§ last sentence)
decode = x_object_obj.get(
IA.DECODE,
([1.0, 0.0] * len(img.getbands()))
if (
(img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))
or (invert_color and img.mode == "L")
)
else None,
(
([1.0, 0.0] * len(img.getbands()))
if (
(img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))
or (invert_color and img.mode == "L")
)
else None
),
)
if (
isinstance(color_space, ArrayObject)
Expand Down Expand Up @@ -381,23 +388,21 @@ def _get_mode_and_invert_color(
else:
mode, invert_color = _get_image_mode(
color_space,
2
if (
colors == 1
and (
not is_null_or_none(color_space)
and "Gray" not in color_space
(
2
if (
colors == 1
and (not is_null_or_none(color_space) and "Gray" not in color_space)
)
)
else colors,
else colors
),
"",
)
return mode, invert_color


def _xobj_to_image(
x_object: dict[str, Any],
pillow_parameters: Union[dict[str, Any], None] = None
x_object: dict[str, Any], pillow_parameters: Union[dict[str, Any], None] = None
) -> tuple[Optional[str], bytes, Any]:
"""
Users need to have the pillow package installed.
Expand All @@ -414,6 +419,7 @@ def _xobj_to_image(
Tuple[file extension, bytes, PIL.Image.Image]

"""

def _apply_alpha(
img: Image.Image,
x_object: dict[str, Any],
Expand Down Expand Up @@ -462,7 +468,7 @@ def _apply_alpha(

# Get color properties
colors = x_object.get("/Colors", 1)
color_space: Any = x_object.get("/ColorSpace", NullObject()).get_object()
color_space: ArrayObject = x_object.get("/ColorSpace", NullObject()).get_object()
if isinstance(color_space, list) and len(color_space) == 1:
color_space = color_space[0].get_object()

Expand Down
30 changes: 15 additions & 15 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,13 @@
from collections.abc import Iterable, Sequence
from io import BytesIO
from math import ceil
from typing import (
Any,
Callable,
Optional,
Union,
cast,
)
from typing import Any, Callable, Optional, Union, cast

from .._protocols import PdfReaderProtocol, PdfWriterProtocol, XmpInformationProtocol
from .._protocols import (
PdfReaderProtocol,
PdfWriterProtocol,
XmpInformationProtocol,
)
from .._utils import (
WHITESPACES,
StreamType,
Expand All @@ -57,9 +55,9 @@
from ..constants import (
CheckboxRadioButtonAttributes,
FieldDictionaryAttributes,
OutlineFontFlag,
)
from ..constants import FilterTypes as FT
from ..constants import OutlineFontFlag
from ..constants import StreamAttributes as SA
from ..constants import TypArguments as TA
from ..constants import TypFitArguments as TF
Expand Down Expand Up @@ -1000,7 +998,7 @@ def write_to_stream(

@staticmethod
def initialize_from_dictionary(
data: dict[str, Any]
data: dict[str, Any],
) -> Union["EncodedStreamObject", "DecodedStreamObject"]:
retval: Union[EncodedStreamObject, DecodedStreamObject]
if SA.FILTER in data:
Expand Down Expand Up @@ -1046,7 +1044,9 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject":
retval._data = FlateDecode.encode(self._data, level)
return retval

def decode_as_image(self, pillow_parameters: Union[dict[str, Any], None] = None) -> Any:
def decode_as_image(
self, pillow_parameters: Union[dict[str, Any], None] = None
) -> Any:
"""
Try to decode the stream object as an image

Expand Down Expand Up @@ -1166,7 +1166,7 @@ def __init__(
# seems to already be broken beforehand in these cases.
logger_warning(
f"Expected StreamObject, got {type(s_resolved).__name__} instead. Data might be wrong.",
__name__
__name__,
)
else:
data += s_resolved.get_data()
Expand Down Expand Up @@ -1598,9 +1598,9 @@ class Destination(TreeObject):

"""

node: Optional[
DictionaryObject
] = None # node provide access to the original Object
node: Optional[DictionaryObject] = (
None # node provide access to the original Object
)

def __init__(
self,
Expand Down
57 changes: 44 additions & 13 deletions tests/test_xobject_image_helpers.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
"""Test the pypdf._xobj_image_helpers module."""

from io import BytesIO
from pathlib import Path

import pytest
from PIL import Image

from pypdf import PdfReader
from pypdf._xobj_image_helpers import _extended_image_from_bytes, _handle_flate, _xobj_to_image
from pypdf._xobj_image_helpers import (
_extended_image_from_bytes,
_handle_flate,
_xobj_to_image,
)
from pypdf.constants import FilterTypes, ImageAttributes, StreamAttributes
from pypdf.errors import EmptyImageDataError, PdfReadError
from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject, StreamObject, TextStringObject
from pypdf.generic import (
ArrayObject,
DecodedStreamObject,
NameObject,
NumberObject,
StreamObject,
TextStringObject,
)

from . import get_data_from_url, get_image_data

Expand Down Expand Up @@ -106,7 +118,9 @@ def test_handle_flate__image_mode_1(caplog):
# here, but received a custom padding of `0`.
lookup.set_data(b"\x42\x42\x42\x00\x13")
caplog.clear()
expected_short_data = tuple([entry if entry[0] == 66 else (0, 19, 0) for entry in expected_data])
expected_short_data = tuple(
[entry if entry[0] == 66 else (0, 19, 0) for entry in expected_data]
)
result = _handle_flate(
size=(3, 3),
data=data,
Expand All @@ -131,27 +145,38 @@ def test_extended_image_frombytes_zero_data():
size = (1, 1)
data = b""

with pytest.raises(EmptyImageDataError, match=r"Data is 0 bytes, cannot process an image from empty data\."):
with pytest.raises(
EmptyImageDataError,
match=r"Data is 0 bytes, cannot process an image from empty data\.",
):
_extended_image_from_bytes(mode, size, data)


def test_handle_flate__autodesk_indexed():
reader = PdfReader(RESOURCE_ROOT / "AutoCad_Diagram.pdf")
page = reader.pages[0]
for name, image in page.images.items():
assert name.startswith("/")
image.image.load()
if isinstance(name, str):
assert name.startswith("/")
else:
assert name[0].startswith("/")

if image.image:
image.image.load()

data = RESOURCE_ROOT.joinpath("AutoCad_Diagram.pdf").read_bytes()
data = data.replace(b"/DeviceRGB\x00255", b"/DeviceRGB")
reader = PdfReader(BytesIO(data))
page = reader.pages[0]
with pytest.raises(
PdfReadError,
match=r"^Expected color space with 4 values, got 3: \['/Indexed', '/DeviceRGB', '\\x00\\x80\\x00\\x80\\x80耀" # noqa: E501
PdfReadError,
match=r"^Expected color space with 4 values, got 3: \['/Indexed', '/DeviceRGB', '\\x00\\x80\\x00\\x80\\x80耀", # noqa: E501
):
for name, _image in page.images.items(): # noqa: PERF102
assert name.startswith("/")
if isinstance(name, str):
assert name.startswith("/")
else:
assert name[0].startswith("/")


@pytest.mark.enable_socket
Expand All @@ -161,7 +186,8 @@ def test_get_mode_and_invert_color():
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
page = reader.pages[12]
for _name, image in page.images.items(): # noqa: PERF102
image.image.load()
if image.image:
image.image.load()


@pytest.mark.enable_socket
Expand All @@ -171,8 +197,11 @@ def test_get_imagemode__empty_array():
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
page = reader.pages[0]

with pytest.raises(expected_exception=PdfReadError, match=r"^ColorSpace field not found in .+"):
page.images[0].image.load()
with pytest.raises(
expected_exception=PdfReadError, match=r"^ColorSpace field not found in .+"
):
if page.images[0].image:
page.images[0].image.load()


def test_p_image_with_alpha_mask():
Expand All @@ -187,7 +216,9 @@ def test_p_image_with_alpha_mask():
for obj in [x_object, mask_object]:
obj[NameObject(ImageAttributes.WIDTH)] = NumberObject(image.width)
obj[NameObject(ImageAttributes.HEIGHT)] = NumberObject(image.height)
obj[NameObject(StreamAttributes.FILTER)] = NameObject(FilterTypes.CCITT_FAX_DECODE)
obj[NameObject(StreamAttributes.FILTER)] = NameObject(
FilterTypes.CCITT_FAX_DECODE
)

# Set the basic image data.
x_object.set_data(image_data.getvalue())
Expand Down
Loading