Skip to content

Commit be2a58a

Browse files
committed
Switch from flake8 to ruff
1 parent ebf0b8d commit be2a58a

File tree

354 files changed

+2281
-2024
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

354 files changed

+2281
-2024
lines changed

.github/workflows/codeql-analysis.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,13 @@ jobs:
5656
- name: Install dependencies
5757
run: |
5858
uv sync --extra files --group test-spark-3.5 --group test-pydantic-2 --group dev
59-
uv pip install -U flake8-commas
6059
6160
# Set the `CODEQL-PYTHON` environment variable to the Python executable
6261
# that includes the dependencies
6362
echo "CODEQL_PYTHON=$(which python)" >> $GITHUB_ENV
6463
65-
- name: Run flake8
66-
run: python3 -m flake8 --config setup.cfg .
67-
6864
- name: Run mypy
69-
run: python3 -m mypy --config-file setup.cfg onetl
65+
run: python3 -m mypy onetl
7066

7167
codeql:
7268
name: CodeQL

.pre-commit-config.yaml

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -116,24 +116,19 @@ repos:
116116
hooks:
117117
- id: ruff-format
118118
priority: 8
119+
- id: ruff
120+
args: [--fix]
121+
priority: 9
119122

120123
- repo: local
121124
hooks:
122-
- id: flake8
123-
name: flake8
124-
entry: python3 -m flake8
125-
language: system
126-
types: [python]
127-
files: ^(onetl|tests)/.*$
128-
pass_filenames: true
129-
priority: 9
130125
- id: mypy
131126
name: mypy
132-
entry: python3 -m mypy --config-file setup.cfg onetl
127+
entry: python3 -m mypy onetl
133128
language: system
134129
types: [python]
135130
pass_filenames: false
136-
priority: 9
131+
priority: 10
137132
- id: towncrier
138133
name: towncrier
139134
entry: towncrier build --draft
@@ -153,6 +148,5 @@ ci:
153148
skip:
154149
- chmod # failing in pre-commit.ci
155150
- docker-compose-check # cannot run on pre-commit.ci
156-
- flake8 # checked with Github Actions
157151
- mypy # checked with Github Actions
158152
- towncrier # checked with Github Actions

onetl/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from onetl.plugins import import_plugins
66
from onetl.version import __version__
77

8+
__all__ = ["__version__"]
9+
810

911
def plugins_auto_import():
1012
"""

onetl/_metrics/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
__all__ = [
1111
"SparkCommandMetrics",
1212
"SparkDriverMetrics",
13-
"SparkMetricsRecorder",
1413
"SparkExecutorMetrics",
1514
"SparkInputMetrics",
15+
"SparkMetricsRecorder",
1616
"SparkOutputMetrics",
1717
]

onetl/_metrics/extract.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
def _get_int(data: dict[SparkSQLMetricNames, list[str]], key: Any) -> int | None:
3030
try:
3131
return int(data[key][0])
32-
except Exception:
32+
except (IndexError, KeyError, ValueError, TypeError):
3333
return None
3434

3535

@@ -38,7 +38,7 @@ def _get_bytes(data: dict[SparkSQLMetricNames, list[str]], key: Any) -> int | No
3838
raw_value = data[key][0]
3939
normalized_value = NON_BYTE_SIZE.sub("", raw_value)
4040
return int(ByteSize.validate(normalized_value))
41-
except Exception:
41+
except (IndexError, KeyError, ValueError, TypeError):
4242
return None
4343

4444

onetl/_metrics/listener/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515
)
1616

1717
__all__ = [
18-
"SparkListenerTask",
19-
"SparkListenerTaskStatus",
20-
"SparkListenerTaskMetrics",
21-
"SparkListenerStage",
22-
"SparkListenerStageStatus",
23-
"SparkListenerJob",
24-
"SparkListenerJobStatus",
2518
"SparkListenerExecution",
2619
"SparkListenerExecutionStatus",
27-
"SparkSQLMetricNames",
20+
"SparkListenerJob",
21+
"SparkListenerJobStatus",
22+
"SparkListenerStage",
23+
"SparkListenerStageStatus",
24+
"SparkListenerTask",
25+
"SparkListenerTaskMetrics",
26+
"SparkListenerTaskStatus",
2827
"SparkMetricsListener",
28+
"SparkSQLMetricNames",
2929
]

onetl/_metrics/listener/base.py

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,28 @@ class BaseSparkListener:
1717
"""Base no-op SparkListener implementation.
1818
1919
See `SparkListener <https://spark.apache.org/docs/3.5.7/api/java/org/apache/spark/scheduler/SparkListener.html>`_ interface.
20-
"""
20+
""" # noqa: E501
2121

2222
spark: SparkSession
2323

2424
def activate(self):
2525
start_callback_server(self.spark)
2626

27-
# passing python listener object directly to addSparkListener or removeSparkListener leads to creating new java object each time.
27+
# passing python listener object directly to addSparkListener or removeSparkListener
28+
# leads to creating new java object each time.
2829
# But removeSparkListener call has effect only on the same Java object passed to removeSparkListener.
2930
# So we need to explicitly create Java object, and then pass it both calls.
3031
gateway = get_java_gateway(self.spark)
3132
java_list = gateway.jvm.java.util.ArrayList()
3233
java_list.append(self)
3334
self._java_listener = java_list[0]
3435

35-
spark_context = self.spark.sparkContext._jsc.sc() # noqa: WPS437
36+
spark_context = self.spark.sparkContext._jsc.sc() # noqa: SLF001
3637
spark_context.addSparkListener(self._java_listener)
3738

3839
def deactivate(self):
3940
with suppress(Exception):
40-
spark_context = self.spark.sparkContext._jsc.sc() # noqa: WPS437
41+
spark_context = self.spark.sparkContext._jsc.sc() # noqa: SLF001
4142
spark_context.removeSparkListener(self._java_listener)
4243

4344
with suppress(Exception):
@@ -50,7 +51,7 @@ def __enter__(self):
5051
def __exit__(self, exc_type, exc_val, exc_tb):
5152
self.deactivate()
5253

53-
def __del__(self): # noqa: WPS603
54+
def __del__(self):
5455
# If current object is collected by GC, deactivate listener
5556
# and free bind Java object
5657
self.deactivate()
@@ -60,119 +61,119 @@ def equals(self, other):
6061
# so we compare string representation which should contain some form of id
6162
return other.toString() == self._java_listener.toString()
6263

63-
def toString(self):
64+
def toString(self): # noqa: N802
6465
return type(self).__qualname__ + "@" + hex(id(self))
6566

66-
def hashCode(self):
67+
def hashCode(self): # noqa: N802
6768
return hash(self)
6869

6970
# no cover: start
7071
# method names are important for Java interface compatibility!
71-
def onApplicationEnd(self, application):
72+
def onApplicationEnd(self, application): # noqa: N802
7273
pass
7374

74-
def onApplicationStart(self, application):
75+
def onApplicationStart(self, application): # noqa: N802
7576
pass
7677

77-
def onBlockManagerAdded(self, block_manager):
78+
def onBlockManagerAdded(self, block_manager): # noqa: N802
7879
pass
7980

80-
def onBlockManagerRemoved(self, block_manager):
81+
def onBlockManagerRemoved(self, block_manager): # noqa: N802
8182
pass
8283

83-
def onBlockUpdated(self, block):
84+
def onBlockUpdated(self, block): # noqa: N802
8485
pass
8586

86-
def onEnvironmentUpdate(self, environment):
87+
def onEnvironmentUpdate(self, environment): # noqa: N802
8788
pass
8889

89-
def onExecutorAdded(self, executor):
90+
def onExecutorAdded(self, executor): # noqa: N802
9091
pass
9192

92-
def onExecutorMetricsUpdate(self, executor):
93+
def onExecutorMetricsUpdate(self, executor): # noqa: N802
9394
pass
9495

95-
def onExecutorRemoved(self, executor):
96+
def onExecutorRemoved(self, executor): # noqa: N802
9697
pass
9798

98-
def onExecutorBlacklisted(self, event):
99+
def onExecutorBlacklisted(self, event): # noqa: N802
99100
pass
100101

101-
def onExecutorBlacklistedForStage(self, event):
102+
def onExecutorBlacklistedForStage(self, event): # noqa: N802
102103
pass
103104

104-
def onExecutorExcluded(self, event):
105+
def onExecutorExcluded(self, event): # noqa: N802
105106
pass
106107

107-
def onExecutorExcludedForStage(self, event):
108+
def onExecutorExcludedForStage(self, event): # noqa: N802
108109
pass
109110

110-
def onExecutorUnblacklisted(self, event):
111+
def onExecutorUnblacklisted(self, event): # noqa: N802
111112
pass
112113

113-
def onExecutorUnexcluded(self, event):
114+
def onExecutorUnexcluded(self, event): # noqa: N802
114115
pass
115116

116-
def onJobStart(self, event):
117+
def onJobStart(self, event): # noqa: N802
117118
pass
118119

119-
def onJobEnd(self, event):
120+
def onJobEnd(self, event): # noqa: N802
120121
pass
121122

122-
def onNodeBlacklisted(self, node):
123+
def onNodeBlacklisted(self, node): # noqa: N802
123124
pass
124125

125-
def onNodeBlacklistedForStage(self, stage):
126+
def onNodeBlacklistedForStage(self, stage): # noqa: N802
126127
pass
127128

128-
def onNodeExcluded(self, node):
129+
def onNodeExcluded(self, node): # noqa: N802
129130
pass
130131

131-
def onNodeExcludedForStage(self, node):
132+
def onNodeExcludedForStage(self, node): # noqa: N802
132133
pass
133134

134-
def onNodeUnblacklisted(self, node):
135+
def onNodeUnblacklisted(self, node): # noqa: N802
135136
pass
136137

137-
def onNodeUnexcluded(self, node):
138+
def onNodeUnexcluded(self, node): # noqa: N802
138139
pass
139140

140-
def onOtherEvent(self, event):
141+
def onOtherEvent(self, event): # noqa: N802
141142
pass
142143

143-
def onResourceProfileAdded(self, resource_profile):
144+
def onResourceProfileAdded(self, resource_profile): # noqa: N802
144145
pass
145146

146-
def onSpeculativeTaskSubmitted(self, task):
147+
def onSpeculativeTaskSubmitted(self, task): # noqa: N802
147148
pass
148149

149-
def onStageCompleted(self, event):
150+
def onStageCompleted(self, event): # noqa: N802
150151
pass
151152

152-
def onStageExecutorMetrics(self, metrics):
153+
def onStageExecutorMetrics(self, metrics): # noqa: N802
153154
pass
154155

155-
def onStageSubmitted(self, event):
156+
def onStageSubmitted(self, event): # noqa: N802
156157
pass
157158

158-
def onTaskEnd(self, event):
159+
def onTaskEnd(self, event): # noqa: N802
159160
pass
160161

161-
def onTaskGettingResult(self, task):
162+
def onTaskGettingResult(self, task): # noqa: N802
162163
pass
163164

164-
def onTaskStart(self, event):
165+
def onTaskStart(self, event): # noqa: N802
165166
pass
166167

167-
def onUnpersistRDD(self, rdd):
168+
def onUnpersistRDD(self, rdd): # noqa: N802
168169
pass
169170

170-
def onUnschedulableTaskSetAdded(self, task_set):
171+
def onUnschedulableTaskSetAdded(self, task_set): # noqa: N802
171172
pass
172173

173-
def onUnschedulableTaskSetRemoved(self, task_set):
174+
def onUnschedulableTaskSetRemoved(self, task_set): # noqa: N802
174175
pass
175176

176177
# no cover: stop
177178
class Java:
178-
implements = ["org.apache.spark.scheduler.SparkListenerInterface"]
179+
implements = ["org.apache.spark.scheduler.SparkListenerInterface"] # noqa: RUF012

onetl/_metrics/listener/execution.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def __str__(self):
1818
return self.value
1919

2020

21-
class SparkSQLMetricNames(str, Enum): # noqa: WPS338
21+
class SparkSQLMetricNames(str, Enum):
2222
# Metric names passed to SQLMetrics.createMetric(...)
2323
# But only those we're interested in.
2424

@@ -60,10 +60,7 @@ class SparkListenerExecution:
6060

6161
@property
6262
def jobs(self) -> list[SparkListenerJob]:
63-
result = []
64-
for job_id in sorted(self._jobs.keys()):
65-
result.append(self._jobs[job_id])
66-
return result
63+
return [self._jobs[job_id] for job_id in sorted(self._jobs.keys())]
6764

6865
def on_execution_start(self, event):
6966
# https://github.com/apache/spark/blob/v3.5.7/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L44-L58

onetl/_metrics/listener/job.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,7 @@ class SparkListenerJob:
3131

3232
@property
3333
def stages(self) -> list[SparkListenerStage]:
34-
result = []
35-
for stage_id in sorted(self._stages.keys()):
36-
result.append(self._stages[stage_id])
37-
return result
34+
return [self._stages[stage_id] for stage_id in sorted(self._stages.keys())]
3835

3936
@classmethod
4037
def create(cls, event):
@@ -50,7 +47,7 @@ def create(cls, event):
5047
stage_ids = scala_seq_to_python_list(event.stageIds())
5148
stage_infos = scala_seq_to_python_list(event.stageInfos())
5249
for stage_id, stage_info in zip(stage_ids, stage_infos):
53-
result._stages[stage_id] = SparkListenerStage.create(stage_info) # noqa: WPS437
50+
result._stages[stage_id] = SparkListenerStage.create(stage_info)
5451

5552
return result
5653

0 commit comments

Comments
 (0)