-
-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Allow decompression to continue after exceeding max_length #11966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
5968707
2eeeb5e
8b0cfee
9cd9154
83fc87c
efe2ca9
57bf4a2
82a4dfb
c44fd1e
a85f38b
602eb51
32f0a84
94c70a9
bd34cea
0d4683c
d3df801
b81f901
4dd2279
3a604a4
10feb8b
d02bbf5
67bd57d
04b717f
53ac968
44c12a1
7986da3
75794c2
ab73626
ae46ee1
d738031
1d5fc0e
28dffda
2eb92be
3dbf9c8
3ea25ca
d99fc34
11cf432
de6fca2
3a80456
7da23c1
a4fc7c1
b3c77d7
f26edc0
1997c8b
b4443b7
b17c013
d628cee
cd2e07a
fd74c03
087de36
103c4a6
a249c04
f20ddc7
0fe7000
5130266
7db0120
d113ccb
185db0b
3be7ac6
80eb8c7
5183441
60c61e3
2a6eff8
dbfc0c8
eac561e
1a40781
520b64a
9c2987b
c4b058d
fa644c7
dd82b9f
3099a40
7775793
9525459
f4985a2
32d5c5f
afa2b55
30c23d4
6a5a2c7
45f66d1
80d955f
0cc6275
69e3bbd
397e905
aed6863
a0fb83b
360f6de
6e04d89
698e0cc
48d4119
6598ff6
3f76e2c
faf6e40
add2b70
3396079
69a59a8
c5f6e6a
51eda1b
156fb3c
8b05bfc
f1bfba5
9c1ffa9
79c9da6
9467ad2
5e8068f
e1de722
46713f5
67c210d
2bc5d17
46eeeda
618309f
2cc7567
1f256df
70a51f5
06c5ec9
69c1602
36c2bc3
78e7099
2c47750
e47504c
d92ed4c
ad2c9ab
bd7c69c
2106fbe
4f7f928
2677720
5cb6324
bd6b520
bd625ba
ef3b5d9
c2ba043
dcdc386
3525d3d
271d10b
6749571
28c6bcf
e874ebe
389b0e6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -291,15 +291,19 @@ cdef class HttpParser: | |
| bint _response_with_body | ||
| bint _read_until_eof | ||
|
|
||
| bytes _tail | ||
| bint _started | ||
| object _url | ||
| bytearray _buf | ||
| str _path | ||
| str _reason | ||
| list _headers | ||
| bint _last_had_more_data | ||
| list _raw_headers | ||
| bint _upgraded | ||
| list _messages | ||
| bint _more_data_available | ||
| bint _paused | ||
| object _payload | ||
| bint _payload_error | ||
| object _payload_exception | ||
|
|
@@ -345,13 +349,17 @@ cdef class HttpParser: | |
| self._timer = timer | ||
|
|
||
| self._buf = bytearray() | ||
| self._last_had_more_data = False | ||
| self._more_data_available = False | ||
| self._paused = False | ||
| self._payload = None | ||
| self._payload_error = 0 | ||
| self._payload_exception = payload_exception | ||
| self._messages = [] | ||
|
|
||
| self._raw_name = EMPTY_BYTES | ||
| self._raw_value = EMPTY_BYTES | ||
| self._tail = b"" | ||
| self._has_value = False | ||
| self._header_name_size = 0 | ||
|
|
||
|
|
@@ -503,6 +511,10 @@ cdef class HttpParser: | |
|
|
||
| ### Public API ### | ||
|
|
||
| def pause_reading(self): | ||
| assert self._payload is not None | ||
| self._paused = True | ||
|
|
||
| def feed_eof(self): | ||
| cdef bytes desc | ||
|
|
||
|
|
@@ -529,6 +541,23 @@ cdef class HttpParser: | |
| size_t nb | ||
| cdef cparser.llhttp_errno_t errno | ||
|
|
||
| if self._tail: | ||
| data, self._tail = self._tail + data, EMPTY_BYTES | ||
|
|
||
| had_more_data = self._more_data_available | ||
| if self._more_data_available: | ||
| result = cb_on_body(self._cparser, EMPTY_BYTES, 0) | ||
| if result is cparser.HPE_PAUSED: | ||
| self._last_had_more_data = had_more_data | ||
| self._tail = data | ||
| return (), False, EMPTY_BYTES | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Newer version of Cython optimize literal |
||
| # TODO: Do we need to handle error case (-1)? | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is already covered, but could use a test. Marked with |
||
| # If the last pause had more data, then we probably paused at the | ||
| # end of the body. Therefore we need to continue with empty bytes. | ||
| if not data and not self._last_had_more_data: | ||
| return (), False, EMPTY_BYTES | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Newer version of Cython optimize literal |
||
| self._last_had_more_data = False | ||
|
|
||
| PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE) | ||
| data_len = <size_t>self.py_buf.len | ||
|
|
||
|
|
@@ -539,12 +568,15 @@ cdef class HttpParser: | |
|
|
||
| if errno is cparser.HPE_PAUSED_UPGRADE: | ||
| cparser.llhttp_resume_after_upgrade(self._cparser) | ||
|
|
||
| nb = cparser.llhttp_get_error_pos(self._cparser) - <char*>self.py_buf.buf | ||
| elif errno is cparser.HPE_PAUSED: | ||
| cparser.llhttp_resume(self._cparser) | ||
| pos = cparser.llhttp_get_error_pos(self._cparser) - <char*>self.py_buf.buf | ||
| self._tail = data[pos:] | ||
|
|
||
| PyBuffer_Release(&self.py_buf) | ||
|
|
||
| if errno not in (cparser.HPE_OK, cparser.HPE_PAUSED_UPGRADE): | ||
| if errno not in (cparser.HPE_OK, cparser.HPE_PAUSED, cparser.HPE_PAUSED_UPGRADE): | ||
| if self._payload_error == 0: | ||
| if self._last_error is not None: | ||
| ex = self._last_error | ||
|
|
@@ -569,7 +601,7 @@ cdef class HttpParser: | |
| if self._upgraded: | ||
| return messages, True, data[nb:] | ||
| else: | ||
| return messages, False, b"" | ||
| return messages, False, EMPTY_BYTES | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Newer version of Cython optimize literal
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, I should make the change in the other direction and remove that global then? |
||
|
|
||
| def set_upgraded(self, val): | ||
| self._upgraded = val | ||
|
|
@@ -762,19 +794,26 @@ cdef int cb_on_body(cparser.llhttp_t* parser, | |
| const char *at, size_t length) except -1: | ||
| cdef HttpParser pyparser = <HttpParser>parser.data | ||
| cdef bytes body = at[:length] | ||
| try: | ||
| pyparser._payload.feed_data(body) | ||
| except BaseException as underlying_exc: | ||
| reraised_exc = underlying_exc | ||
| if pyparser._payload_exception is not None: | ||
| reraised_exc = pyparser._payload_exception(str(underlying_exc)) | ||
|
|
||
| set_exception(pyparser._payload, reraised_exc, underlying_exc) | ||
|
|
||
| pyparser._payload_error = 1 | ||
| return -1 | ||
| else: | ||
| return 0 | ||
| while body or pyparser._more_data_available: | ||
| try: | ||
| pyparser._more_data_available = pyparser._payload.feed_data(body) | ||
| except BaseException as underlying_exc: | ||
| reraised_exc = underlying_exc | ||
| if pyparser._payload_exception is not None: | ||
| reraised_exc = pyparser._payload_exception(str(underlying_exc)) | ||
|
|
||
| set_exception(pyparser._payload, reraised_exc, underlying_exc) | ||
|
|
||
| pyparser._payload_error = 1 | ||
| pyparser._paused = False | ||
| return -1 | ||
| body = EMPTY_BYTES | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Newer version of Cython optimize literal |
||
|
|
||
| if pyparser._paused: | ||
| pyparser._paused = False | ||
| return cparser.HPE_PAUSED | ||
| pyparser._paused = False | ||
| return 0 | ||
|
|
||
|
|
||
| cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,7 +34,9 @@ | |
|
|
||
|
|
||
| MAX_SYNC_CHUNK_SIZE = 4096 | ||
| DEFAULT_MAX_DECOMPRESS_SIZE = 2**25 # 32MiB | ||
| # Matches the max size we receive from sockets: | ||
| # https://github.com/python/cpython/blob/1857a40807daeae3a1bf5efb682de9c9ae6df845/Lib/asyncio/selector_events.py#L766 | ||
| DEFAULT_MAX_DECOMPRESS_SIZE = 256 * 1024 | ||
Dreamsorcerer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # Unlimited decompression constants - different libraries use different conventions | ||
| ZLIB_MAX_LENGTH_UNLIMITED = 0 # zlib uses 0 to mean unlimited | ||
|
|
@@ -53,6 +55,9 @@ def flush(self, length: int = ..., /) -> bytes: ... | |
| @property | ||
| def eof(self) -> bool: ... | ||
|
|
||
| @property | ||
| def unconsumed_tail(self) -> bytes: ... | ||
|
|
||
|
|
||
| class ZLibBackendProtocol(Protocol): | ||
| MAX_WBITS: int | ||
|
|
@@ -179,6 +184,11 @@ async def decompress( | |
| ) | ||
| return self.decompress_sync(data, max_length) | ||
|
|
||
| @property | ||
| @abstractmethod | ||
| def data_available(self) -> bool: | ||
| """Return True if more output is available by passing b"".""" | ||
|
|
||
|
|
||
| class ZLibCompressor: | ||
| def __init__( | ||
|
|
@@ -271,7 +281,9 @@ def __init__( | |
| def decompress_sync( | ||
| self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED | ||
| ) -> bytes: | ||
| return self._decompressor.decompress(data, max_length) | ||
| return self._decompressor.decompress( | ||
| self._decompressor.unconsumed_tail + data, max_length | ||
| ) | ||
|
|
||
| def flush(self, length: int = 0) -> bytes: | ||
| return ( | ||
|
|
@@ -280,6 +292,10 @@ def flush(self, length: int = 0) -> bytes: | |
| else self._decompressor.flush() | ||
| ) | ||
|
|
||
| @property | ||
| def data_available(self) -> bool: | ||
| return bool(self._decompressor.unconsumed_tail) | ||
|
|
||
| @property | ||
| def eof(self) -> bool: | ||
| return self._decompressor.eof | ||
|
|
@@ -301,22 +317,31 @@ def __init__( | |
| "Please install `Brotli` module" | ||
| ) | ||
| self._obj = brotli.Decompressor() | ||
| self._last_empty = False | ||
| super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size) | ||
|
|
||
| def decompress_sync( | ||
| self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED | ||
| ) -> bytes: | ||
| """Decompress the given data.""" | ||
| if hasattr(self._obj, "decompress"): | ||
| return cast(bytes, self._obj.decompress(data, max_length)) | ||
| return cast(bytes, self._obj.process(data, max_length)) | ||
| result = cast(bytes, self._obj.decompress(data, max_length)) | ||
|
||
| else: | ||
| result = cast(bytes, self._obj.process(data, max_length)) | ||
| # Only way to know that brotli has no further data is checking we get no output | ||
| self._last_empty = result == b"" | ||
|
Comment on lines
+331
to
+332
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is what it is, but if we only need it for Brotli maybe move the self._last_empty to the brotli classes so its clear its only for brotli?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the BrotliDecompressor class. |
||
| return result | ||
|
|
||
| def flush(self) -> bytes: | ||
| """Flush the decompressor.""" | ||
| if hasattr(self._obj, "flush"): | ||
| return cast(bytes, self._obj.flush()) | ||
| return b"" | ||
|
|
||
| @property | ||
| def data_available(self) -> bool: | ||
| return not self._obj.is_finished() and not self._last_empty | ||
|
|
||
|
|
||
| class ZSTDDecompressor(DecompressionBaseHandler): | ||
| def __init__( | ||
|
|
@@ -346,3 +371,7 @@ def decompress_sync( | |
|
|
||
| def flush(self) -> bytes: | ||
| return b"" | ||
|
|
||
| @property | ||
| def data_available(self) -> bool: | ||
| return not self._obj.needs_input and not self._obj.eof | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,10 +73,6 @@ class ContentLengthError(PayloadEncodingError): | |
| """Not enough data to satisfy content length header.""" | ||
|
|
||
|
|
||
| class DecompressSizeError(PayloadEncodingError): | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to retain this in the backport, in case anybody is referencing it. |
||
| """Decompressed size exceeds the configured limit.""" | ||
|
|
||
|
|
||
| class LineTooLong(BadHttpMessage): | ||
| def __init__(self, line: bytes, limit: int) -> None: | ||
| super().__init__(f"Got more than {limit} bytes when reading: {line!r}.") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CFLAGS="-I$(pwd)/vendor/llhttp/build" cythonize -X language_level=3 -a -i aiohttp/_http_parser.pyxThan open
aiohttp/_http_parser.htmland look for yellow. Anything that it white Cython can optimize to remove the Python calls.