Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion python/pyhive/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ def __init__(
check_hostname=None,
ssl_cert=None,
thrift_transport=None,
ssl_context=None
ssl_context=None,
connection_timeout=None,
):
"""Connect to HiveServer2

Expand All @@ -175,6 +176,7 @@ def __init__(
Incompatible with host, port, auth, kerberos_service_name, and password.
:param ssl_context: A custom SSL context to use for HTTPS connections. If provided,
this overrides check_hostname and ssl_cert parameters.
:param connection_timeout: Millisecond timeout for Thrift connections. Skipped if using thrift_transport.
The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
/impala/_thrift_api.py#L152-L160
Expand All @@ -193,6 +195,8 @@ def __init__(
),
ssl_context=ssl_context,
)
if connection_timeout is not None:
thrift_transport.setTimeout(connection_timeout)

if auth in ("BASIC", "NOSASL", "NONE", None):
# Always needs the Authorization header
Expand Down Expand Up @@ -236,6 +240,8 @@ def __init__(
if auth is None:
auth = 'NONE'
socket = thrift.transport.TSocket.TSocket(host, port)
if connection_timeout is not None:
socket.setTimeout(connection_timeout)
Copy link

@fbertsch fbertsch Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI this sets both connection and socket timeout. There's a nice description of the difference here.

Suggested change
socket.setTimeout(connection_timeout)
socket.setConnectTimeout(connection_timeout)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good mention. My goal with this was originally to manage the socket timeout. However, I think I will leave this way to not get too granular.

if auth == 'NOSASL':
# NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
self._transport = thrift.transport.TTransport.TBufferedTransport(socket)
Expand Down
11 changes: 11 additions & 0 deletions python/pyhive/tests/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,17 @@ def test_basic_ssl_context(self):
cursor.execute('SELECT 1 FROM one_row')
self.assertEqual(cursor.fetchall(), [(1,)])

def test_connection_timeout(self):
"""Test that a connection timeout is set without error."""
with contextlib.closing(hive.connect(
host=_HOST,
port=10000,
connection_timeout=10 * 1000
)) as connection:
with contextlib.closing(connection.cursor()) as cursor:
# Use the same query pattern as other tests
cursor.execute('SELECT 1 FROM one_row')
self.assertEqual(cursor.fetchall(), [(1,)])

def _restart_hs2():
subprocess.check_call(['sudo', 'service', 'hive-server2', 'restart'])
Expand Down