Skip to content

Commit d475659

Browse files
authored
Show error locations in other modules on warm runs (#20635)
Fixes #4772 This also allows to remove all but one `_no_parallel` test suffixes. I decided to keep the last one (caused by an existing inconsistent behaviour in `partial` plugin) for now. I am still fascinated by how non-trivial this seemingly simple fix is. The key idea is to split the error tuple generation in two phases: first phase can produce relative error locations (as symbol full names) and those are serialized to cache. We then resolve relative locations in the second phase (which may potentially require re-parsing some ASTs that were already discarded, I cache these globally per-run). Couple missing things in the PR: * Only AST nodes that are also `SymbolNode` are supported (so e.g. assignments are not yet supported). * Using relative locations in blocker errors is not supported (they use a different code path). Both are fine for now, since neither of two cases are actually used, we can support those later when/if needed.
1 parent 1288f20 commit d475659

17 files changed

+441
-32
lines changed

mypy/build.py

Lines changed: 109 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,32 @@
8383
WORKER_START_TIMEOUT,
8484
)
8585
from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
86-
from mypy.errors import CompileError, ErrorInfo, Errors, ErrorTuple, report_internal_error
86+
from mypy.errors import (
87+
CompileError,
88+
ErrorInfo,
89+
Errors,
90+
ErrorTuple,
91+
ErrorTupleRaw,
92+
report_internal_error,
93+
)
8794
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
8895
from mypy.indirection import TypeIndirectionVisitor
8996
from mypy.ipc import BadStatus, IPCClient, IPCMessage, read_status, ready_to_read, receive, send
9097
from mypy.messages import MessageBuilder
91-
from mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable
98+
from mypy.nodes import (
99+
ClassDef,
100+
Context,
101+
Import,
102+
ImportAll,
103+
ImportBase,
104+
ImportFrom,
105+
MypyFile,
106+
SymbolTable,
107+
)
92108
from mypy.partially_defined import PossiblyUndefinedVariableVisitor
93109
from mypy.semanal import SemanticAnalyzer
94110
from mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis
111+
from mypy.traverser import find_definitions
95112
from mypy.util import (
96113
DecodeError,
97114
decode_python_encoding,
@@ -866,6 +883,10 @@ def __init__(
866883
self.queue_order: int = 0
867884
# Is this an instance used by a parallel worker?
868885
self.parallel_worker = parallel_worker
886+
# Cache for ASTs created during error message generation. Note these are
887+
# raw parsed trees not analyzed with mypy. We use these to find absolute
888+
# location of a symbol used as a location for an error message.
889+
self.extra_trees: dict[str, MypyFile] = {}
869890

870891
def dump_stats(self) -> None:
871892
if self.options.dump_build_stats:
@@ -1028,6 +1049,60 @@ def report_file(
10281049
if self.reports is not None and self.source_set.is_source(file):
10291050
self.reports.file(file, self.modules, type_map, options)
10301051

1052+
def resolve_location(self, graph: dict[str, State], fullname: str) -> Context | None:
1053+
"""Resolve an absolute location of a symbol with given fullname."""
1054+
rest = []
1055+
head = fullname
1056+
while True:
1057+
# TODO: this mimics the logic in lookup.py but it is actually wrong.
1058+
# This is because we don't distinguish between submodule and a local symbol
1059+
# with the same name.
1060+
head, tail = head.rsplit(".", maxsplit=1)
1061+
rest.append(tail)
1062+
if head in graph:
1063+
state = graph[head]
1064+
break
1065+
if "." not in head:
1066+
return None
1067+
*prefix, name = reversed(rest)
1068+
# If this happens something is wrong, but it is better to give slightly
1069+
# less helpful error message than crash.
1070+
if state.path is None:
1071+
return None
1072+
if state.tree is not None and state.tree.defs:
1073+
# We usually free ASTs after processing, but reuse an existing AST if
1074+
# it is still available.
1075+
tree = state.tree
1076+
elif state.id in self.extra_trees:
1077+
tree = self.extra_trees[state.id]
1078+
else:
1079+
if state.source is not None:
1080+
# Sources are usually discarded after processing as well, check
1081+
# if we still have one just in case.
1082+
source = state.source
1083+
else:
1084+
path = state.manager.maybe_swap_for_shadow_path(state.path)
1085+
source = decode_python_encoding(state.manager.fscache.read(path))
1086+
tree = parse(source, state.path, state.id, state.manager.errors, state.options)
1087+
# TODO: run first pass of semantic analysis on freshly parsed trees,
1088+
# we need this to get correct reachability information.
1089+
self.extra_trees[state.id] = tree
1090+
statements = tree.defs
1091+
while prefix:
1092+
part = prefix.pop(0)
1093+
for statement in statements:
1094+
defs = find_definitions(statement, part)
1095+
if not defs or not isinstance((defn := defs[0]), ClassDef):
1096+
continue
1097+
statements = defn.defs.body
1098+
break
1099+
else:
1100+
return None
1101+
for statement in statements:
1102+
if defs := find_definitions(statement, name):
1103+
return defs[0]
1104+
return None
1105+
10311106
def verbosity(self) -> int:
10321107
return self.options.verbosity
10331108

@@ -2359,7 +2434,7 @@ def load_tree(self, temporary: bool = False) -> None:
23592434

23602435
def fix_cross_refs(self) -> None:
23612436
assert self.tree is not None, "Internal error: method must be called on parsed file only"
2362-
# We need to set allow_missing when doing a fine grained cache
2437+
# We need to set allow_missing when doing a fine-grained cache
23632438
# load because we need to gracefully handle missing modules.
23642439
fixup_module(self.tree, self.manager.modules, self.options.use_fine_grained_cache)
23652440

@@ -3558,7 +3633,9 @@ def find_stale_sccs(
35583633
path = manager.errors.simplify_path(graph[id].xpath)
35593634
formatted = manager.errors.format_messages(
35603635
path,
3561-
deserialize_codes(graph[id].error_lines),
3636+
transform_error_tuples(
3637+
manager, graph, deserialize_codes(graph[id].error_lines)
3638+
),
35623639
formatter=manager.error_formatter,
35633640
)
35643641
manager.flush_errors(path, formatted, False)
@@ -3813,7 +3890,9 @@ def process_stale_scc(
38133890
if graph[id].xpath not in manager.errors.ignored_files:
38143891
errors = manager.errors.file_messages(graph[id].xpath)
38153892
formatted = manager.errors.format_messages(
3816-
graph[id].xpath, errors, formatter=manager.error_formatter
3893+
graph[id].xpath,
3894+
transform_error_tuples(manager, graph, errors),
3895+
formatter=manager.error_formatter,
38173896
)
38183897
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), formatted, False)
38193898
errors_by_id[id] = errors
@@ -3972,14 +4051,37 @@ def write_undocumented_ref_info(
39724051
metastore.write(ref_info_file, json_dumps(deps_json))
39734052

39744053

3975-
def serialize_codes(errs: list[ErrorTuple]) -> list[SerializedError]:
4054+
def transform_error_tuples(
4055+
manager: BuildManager, graph: dict[str, State], error_tuples_rel: list[ErrorTupleRaw]
4056+
) -> list[ErrorTuple]:
4057+
"""Transform raw error tuples by resolving relative error locations."""
4058+
error_tuples = []
4059+
for e in error_tuples_rel:
4060+
file, line_rel, column, end_line, end_column, severity, message, code = e
4061+
if isinstance(line_rel, int):
4062+
line = line_rel
4063+
else:
4064+
assert file is not None
4065+
loc = manager.resolve_location(graph, line_rel)
4066+
if loc is not None:
4067+
line = loc.line
4068+
column = loc.column
4069+
end_line = loc.end_line or -1
4070+
end_column = loc.end_column or -1
4071+
else:
4072+
line = -1
4073+
error_tuples.append((file, line, column, end_line, end_column, severity, message, code))
4074+
return error_tuples
4075+
4076+
4077+
def serialize_codes(errs: list[ErrorTupleRaw]) -> list[SerializedError]:
39764078
return [
39774079
(path, line, column, end_line, end_column, severity, message, code.code if code else None)
39784080
for path, line, column, end_line, end_column, severity, message, code in errs
39794081
]
39804082

39814083

3982-
def deserialize_codes(errs: list[SerializedError]) -> list[ErrorTuple]:
4084+
def deserialize_codes(errs: list[SerializedError]) -> list[ErrorTupleRaw]:
39834085
return [
39844086
(
39854087
path,

mypy/cache.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
# High-level cache layout format
7272
CACHE_VERSION: Final = 1
7373

74-
SerializedError: _TypeAlias = tuple[str | None, int, int, int, int, str, str, str | None]
74+
SerializedError: _TypeAlias = tuple[str | None, int | str, int, int, int, str, str, str | None]
7575

7676

7777
class CacheMeta:
@@ -479,7 +479,10 @@ def write_errors(data: WriteBuffer, errs: list[SerializedError]) -> None:
479479
for path, line, column, end_line, end_column, severity, message, code in errs:
480480
write_tag(data, TUPLE_GEN)
481481
write_str_opt(data, path)
482-
write_int(data, line)
482+
if isinstance(line, str):
483+
write_str(data, line)
484+
else:
485+
write_int(data, line)
483486
write_int(data, column)
484487
write_int(data, end_line)
485488
write_int(data, end_column)
@@ -493,10 +496,17 @@ def read_errors(data: ReadBuffer) -> list[SerializedError]:
493496
result = []
494497
for _ in range(read_int_bare(data)):
495498
assert read_tag(data) == TUPLE_GEN
499+
path = read_str_opt(data)
500+
tag = read_tag(data)
501+
if tag == LITERAL_STR:
502+
line: str | int = read_str_bare(data)
503+
else:
504+
assert tag == LITERAL_INT
505+
line = read_int_bare(data)
496506
result.append(
497507
(
498-
read_str_opt(data),
499-
read_int(data),
508+
path,
509+
line,
500510
read_int(data),
501511
read_int(data),
502512
read_int(data),

mypy/errors.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def __init__(
130130
target: str | None = None,
131131
priority: int = 0,
132132
parent_error: ErrorInfo | None = None,
133+
location_ref: str | None = None,
133134
) -> None:
134135
self.import_ctx = import_ctx
135136
self.file = file
@@ -151,12 +152,18 @@ def __init__(
151152
if parent_error is not None:
152153
assert severity == "note", "Only notes can specify parent errors"
153154
self.parent_error = parent_error
155+
self.location_ref = location_ref
154156

155157

156158
# Type used internally to represent errors:
157159
# (path, line, column, end_line, end_column, severity, message, code)
158160
ErrorTuple: _TypeAlias = tuple[str | None, int, int, int, int, str, str, ErrorCode | None]
159161

162+
# A raw version of the above that can refer to either absolute or relative location.
163+
# If the location is relative, the first item (line) is a string with a symbol fullname,
164+
# and three other values (column, end_line, end_column) are set to -1.
165+
ErrorTupleRaw: _TypeAlias = tuple[str | None, int | str, int, int, int, str, str, ErrorCode | None]
166+
160167

161168
class ErrorWatcher:
162169
"""Context manager that can be used to keep track of new errors recorded
@@ -569,6 +576,7 @@ def report(
569576
end_line: int | None = None,
570577
end_column: int | None = None,
571578
parent_error: ErrorInfo | None = None,
579+
location_ref: str | None = None,
572580
) -> ErrorInfo:
573581
"""Report message at the given line using the current error context.
574582
@@ -635,6 +643,7 @@ def report(
635643
origin=(self.file, origin_span),
636644
target=self.current_target(),
637645
parent_error=parent_error,
646+
location_ref=location_ref,
638647
)
639648
self.add_error_info(info)
640649
return info
@@ -1014,6 +1023,8 @@ def raise_error(self, use_stdout: bool = True) -> NoReturn:
10141023
"""
10151024
# self.new_messages() will format all messages that haven't already
10161025
# been returned from a file_messages() call.
1026+
# TODO: pass resolve_location callback here.
1027+
# This will be needed if we are going to use relative locations in blocker errors.
10171028
raise CompileError(
10181029
self.new_messages(), use_stdout=use_stdout, module_with_blocker=self.blocker_module()
10191030
)
@@ -1076,7 +1087,7 @@ def format_messages_default(
10761087
a.append(" " * (DEFAULT_SOURCE_OFFSET + column) + marker)
10771088
return a
10781089

1079-
def file_messages(self, path: str) -> list[ErrorTuple]:
1090+
def file_messages(self, path: str) -> list[ErrorTupleRaw]:
10801091
"""Return an error tuple list of new error messages from a given file."""
10811092
if path not in self.error_info_map:
10821093
return []
@@ -1119,7 +1130,9 @@ def find_shadow_file_mapping(self, path: str) -> str | None:
11191130
return i[1]
11201131
return None
11211132

1122-
def new_messages(self) -> list[str]:
1133+
def new_messages(
1134+
self, resolve_location: Callable[[str], Context | None] | None = None
1135+
) -> list[str]:
11231136
"""Return a string list of new error messages.
11241137
11251138
Use a form suitable for displaying to the user.
@@ -1129,7 +1142,29 @@ def new_messages(self) -> list[str]:
11291142
msgs = []
11301143
for path in self.error_info_map.keys():
11311144
if path not in self.flushed_files:
1132-
error_tuples = self.file_messages(path)
1145+
error_tuples_rel = self.file_messages(path)
1146+
error_tuples = []
1147+
for e in error_tuples_rel:
1148+
# This has a bit of code duplication with build.py, but it is hard
1149+
# to avoid without either an import cycle or a performance penalty.
1150+
file, line_rel, column, end_line, end_column, severity, message, code = e
1151+
if isinstance(line_rel, int):
1152+
line = line_rel
1153+
elif resolve_location is not None:
1154+
assert file is not None
1155+
loc = resolve_location(line_rel)
1156+
if loc is not None:
1157+
line = loc.line
1158+
column = loc.column
1159+
end_line = loc.end_line or -1
1160+
end_column = loc.end_column or -1
1161+
else:
1162+
line = -1
1163+
else:
1164+
line = -1
1165+
error_tuples.append(
1166+
(file, line, column, end_line, end_column, severity, message, code)
1167+
)
11331168
msgs.extend(self.format_messages(path, error_tuples))
11341169
return msgs
11351170

@@ -1141,15 +1176,15 @@ def targets(self) -> set[str]:
11411176
info.target for errs in self.error_info_map.values() for info in errs if info.target
11421177
}
11431178

1144-
def render_messages(self, errors: list[ErrorInfo]) -> list[ErrorTuple]:
1179+
def render_messages(self, errors: list[ErrorInfo]) -> list[ErrorTupleRaw]:
11451180
"""Translate the messages into a sequence of tuples.
11461181
11471182
Each tuple is of form (path, line, col, severity, message, code).
11481183
The rendered sequence includes information about error contexts.
11491184
The path item may be None. If the line item is negative, the
11501185
line number is not defined for the tuple.
11511186
"""
1152-
result: list[ErrorTuple] = []
1187+
result: list[ErrorTupleRaw] = []
11531188
prev_import_context: list[tuple[str, int]] = []
11541189
prev_function_or_member: str | None = None
11551190
prev_type: str | None = None
@@ -1224,9 +1259,21 @@ def render_messages(self, errors: list[ErrorInfo]) -> list[ErrorTuple]:
12241259
else:
12251260
result.append((file, -1, -1, -1, -1, "note", f'In class "{e.type}":', None))
12261261

1227-
result.append(
1228-
(file, e.line, e.column, e.end_line, e.end_column, e.severity, e.message, e.code)
1229-
)
1262+
if e.location_ref is not None:
1263+
result.append((file, e.location_ref, -1, -1, -1, e.severity, e.message, e.code))
1264+
else:
1265+
result.append(
1266+
(
1267+
file,
1268+
e.line,
1269+
e.column,
1270+
e.end_line,
1271+
e.end_column,
1272+
e.severity,
1273+
e.message,
1274+
e.code,
1275+
)
1276+
)
12301277

12311278
prev_import_context = e.import_ctx
12321279
prev_function_or_member = e.function_or_member

mypy/lookup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ def lookup_fully_qualified(
3232
if raise_on_missing:
3333
assert "." in head, f"Cannot find module for {name}"
3434
return None
35+
# TODO: this logic is not correct as it confuses a submodule and a local symbol.
36+
# A potential solution may be to use format like pkg.mod:Cls.method for fullname,
37+
# but this is a relatively big change.
3538
head, tail = head.rsplit(".", maxsplit=1)
3639
rest.append(tail)
3740
mod = modules.get(head)

mypy/messages.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ def span_from_context(ctx: Context) -> Iterable[int]:
266266
assert origin_span is not None
267267
origin_span = itertools.chain(origin_span, span_from_context(secondary_context))
268268

269+
location_ref = None
270+
if file is not None and file != self.errors.file:
271+
assert isinstance(context, SymbolNode), "Only symbols can be locations in other files"
272+
location_ref = context.fullname
273+
269274
return self.errors.report(
270275
context.line if context else -1,
271276
context.column if context else -1,
@@ -278,6 +283,7 @@ def span_from_context(ctx: Context) -> Iterable[int]:
278283
end_column=context.end_column if context else -1,
279284
code=code,
280285
parent_error=parent_error,
286+
location_ref=location_ref,
281287
)
282288

283289
def fail(

0 commit comments

Comments
 (0)