|
| 1 | +# Copyright Materialize, Inc. and contributors. All rights reserved. |
| 2 | +# |
| 3 | +# Use of this software is governed by the Business Source License |
| 4 | +# included in the LICENSE file at the root of this repository. |
| 5 | +# |
| 6 | +# As of the Change Date specified in that file, in accordance with |
| 7 | +# the Business Source License, use of this software will be governed |
| 8 | +# by the Apache License, Version 2.0. |
| 9 | + |
| 10 | +""" |
| 11 | +Shared code for running SQLancer and SQLancer++ against Materialize. |
| 12 | +
|
| 13 | +SQLancer is an automated testing tool to find logic bugs in database systems: |
| 14 | +- SQLancer: https://github.com/sqlancer/sqlancer |
| 15 | +- SQLancer++: https://github.com/def-/sqlancerplusplus |
| 16 | +""" |
| 17 | + |
| 18 | +import argparse |
| 19 | +import random |
| 20 | +import re |
| 21 | +import shutil |
| 22 | +import subprocess |
| 23 | +from collections.abc import Callable |
| 24 | +from pathlib import Path |
| 25 | +from threading import Thread |
| 26 | + |
| 27 | +from materialize import buildkite, spawn |
| 28 | +from materialize.mzcompose.composition import ( |
| 29 | + Composition, |
| 30 | + WorkflowArgumentParser, |
| 31 | +) |
| 32 | +from materialize.mzcompose.service import Service |
| 33 | +from materialize.mzcompose.services.materialized import Materialized |
| 34 | + |
| 35 | +IGNORED_ERROR_PATTERNS = [ |
| 36 | + r"Datum::unwrap_(dict|range|list|string) called on", # TODO: Remove when https://github.com/MaterializeInc/database-issues/issues/10064 is fixed |
| 37 | + r"Can't union types", # TODO: Remove when https://github.com/MaterializeInc/database-issues/issues/10052 is fixed |
| 38 | + r"^(?!.*panic).*not yet supported", |
| 39 | + r"does not exist", |
| 40 | + r"must have", |
| 41 | + r"overflow", |
| 42 | + r"invalid input", |
| 43 | + r"cannot be matched", |
| 44 | + r"implicitly casting", |
| 45 | + r"is not unique", |
| 46 | + r"invalid digit", |
| 47 | + r"is not defined", |
| 48 | + r"is defined", |
| 49 | + r"unterminated", |
| 50 | + r"Expected ", |
| 51 | + r"is out of range", |
| 52 | + r"of a negative", |
| 53 | + r"does not support", |
| 54 | + r"ANALYZE", |
| 55 | + r"is of type", |
| 56 | + r"must appear in the", |
| 57 | + r"negative substring length", |
| 58 | + r"is ambiguous", |
| 59 | + r"out of range", |
| 60 | + r"division by zero", |
| 61 | + r"is only defined for finite", |
| 62 | + r"cannot canonicalize predicates that are not of type", |
| 63 | + r"unexpected character in input", |
| 64 | + r"could not determine polymorphic type", |
| 65 | + r"cannot reference pseudo type", |
| 66 | + r"string is not a valid identifier", |
| 67 | + r"invalid regular expression", |
| 68 | + r" violates not-null constraint", |
| 69 | + r"could not convert type", |
| 70 | + r"are not allowed", |
| 71 | + r"requires a record", |
| 72 | + r"unrecognized privilege type", |
| 73 | + r"cannot cast", |
| 74 | + r"requires an OVER", |
| 75 | + r"value too long", |
| 76 | + r"may only refer to user-defined", |
| 77 | + r"cannot materialize call to", |
| 78 | + r"invalid hash algorithm", |
| 79 | + r"expected id", |
| 80 | + r"expected exactly one statement", |
| 81 | + r"regex parse error", |
| 82 | + r"invalid encoding name", |
| 83 | + r"must specify at least one capture group", |
| 84 | + r"requires a string literal", |
| 85 | + r"too large for encoding", |
| 86 | + r"must be a positive integer", |
| 87 | + r"bound must be less than", |
| 88 | + r"not recognized", |
| 89 | + r"invalid escape string", |
| 90 | + r"canceling statement due to statement timeout", |
| 91 | + r"invalid time zone", |
| 92 | + r"result exceeds max size", |
| 93 | + r"calls to mz_now in write statements", |
| 94 | + r"out of valid range", |
| 95 | + r"more than one record produced in subquery", |
| 96 | + r"input of anonymous composite types is not implemented", |
| 97 | + r"lists must all be the same length", |
| 98 | + r"invalid IANA Time Zone Database identifier", |
| 99 | + r"unknown schema", |
| 100 | + r"octal escapes are not supported", |
| 101 | + r"attempt to create relation with too many columns", |
| 102 | + r"column notation applied to type text", |
| 103 | + r"Unexpected EOF", |
| 104 | + r"missing required exponent", |
| 105 | + r"invalid unicode escape", |
| 106 | + r"dimension array or low bound array must not be null", |
| 107 | + r"LIKE pattern exceeds maximum length", |
| 108 | + r"cannot return complex numbers", |
| 109 | + r"must be greater than zero", |
| 110 | + r"null character not permitted", |
| 111 | + r"invalid datepart", |
| 112 | + r"must use value within", |
| 113 | + r"null character not permitted", |
| 114 | + r"expressions must appear in select list", |
| 115 | + r"expressions must match initial", |
| 116 | + r"invalid selection: operation may only", |
| 117 | + r"array size exceeds the maximum allowed", |
| 118 | + r"does not allow subqueries", |
| 119 | +] |
| 120 | + |
| 121 | + |
| 122 | +def check_query_errors(logs_dir: Path) -> list[str]: |
| 123 | + unexpected_errors = [] |
| 124 | + log_files = list(logs_dir.glob("*.log")) |
| 125 | + |
| 126 | + for log_file in log_files: |
| 127 | + try: |
| 128 | + content = log_file.read_text() |
| 129 | + except Exception: |
| 130 | + continue |
| 131 | + |
| 132 | + for line in content.splitlines(): |
| 133 | + if "ERROR" not in line: |
| 134 | + continue |
| 135 | + |
| 136 | + is_ignored = False |
| 137 | + for pattern in IGNORED_ERROR_PATTERNS: |
| 138 | + if re.search(pattern, line): |
| 139 | + is_ignored = True |
| 140 | + break |
| 141 | + |
| 142 | + if not is_ignored: |
| 143 | + unexpected_errors.append(f"{log_file.name}: {line}") |
| 144 | + |
| 145 | + return unexpected_errors |
| 146 | + |
| 147 | + |
| 148 | +def create_services(service_name: str) -> list[Service | Materialized]: |
| 149 | + """Create the services needed for SQLancer or SQLancer++ tests.""" |
| 150 | + return [ |
| 151 | + # Auto-restart so we can keep testing even after we ran into a panic |
| 152 | + Materialized( |
| 153 | + restart="on-failure", |
| 154 | + default_replication_factor=1, |
| 155 | + additional_system_parameter_defaults={ |
| 156 | + "enable_alter_table_add_column": "true", |
| 157 | + "enable_statement_lifecycle_logging": "false", |
| 158 | + "enable_internal_statement_logging": "false", |
| 159 | + "statement_logging_default_sample_rate": "0", |
| 160 | + "statement_logging_max_sample_rate": "0", |
| 161 | + "enable_repeat_row": "true", |
| 162 | + "enable_list_length_max": "true", |
| 163 | + "enable_list_n_layers": "true", |
| 164 | + "enable_time_at_time_zone": "true", |
| 165 | + "enable_date_bin_hopping": "true", |
| 166 | + }, |
| 167 | + ), |
| 168 | + Service( |
| 169 | + service_name, |
| 170 | + { |
| 171 | + "mzbuild": service_name, |
| 172 | + }, |
| 173 | + ), |
| 174 | + ] |
| 175 | + |
| 176 | + |
| 177 | +def _print_logs(container_id: str) -> None: |
| 178 | + spawn.runv(["docker", "logs", "-f", container_id]) |
| 179 | + |
| 180 | + |
| 181 | +def run_sqlancer( |
| 182 | + c: Composition, |
| 183 | + parser: WorkflowArgumentParser, |
| 184 | + *, |
| 185 | + service_name: str, |
| 186 | + default_oracle: str, |
| 187 | + build_run_args: Callable[[argparse.Namespace, int], list[str]], |
| 188 | + docker_logs_path: str, |
| 189 | + log_prefix: str, |
| 190 | +) -> None: |
| 191 | + parser.add_argument("--runtime", default=600, type=int) |
| 192 | + parser.add_argument("--num-tries", default=100000, type=int) |
| 193 | + parser.add_argument("--num-threads", default=16, type=int) |
| 194 | + parser.add_argument("--seed", default=None, type=int) |
| 195 | + parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction) |
| 196 | + parser.add_argument("--oracle", default=default_oracle, type=str) |
| 197 | + args = parser.parse_args() |
| 198 | + |
| 199 | + c.up("materialized") |
| 200 | + |
| 201 | + c.sql( |
| 202 | + "ALTER SYSTEM SET max_databases TO 1000", |
| 203 | + user="mz_system", |
| 204 | + port=6877, |
| 205 | + ) |
| 206 | + c.sql( |
| 207 | + "ALTER SYSTEM SET max_tables TO 1000", |
| 208 | + user="mz_system", |
| 209 | + port=6877, |
| 210 | + ) |
| 211 | + c.sql( |
| 212 | + "ALTER SYSTEM SET max_materialized_views TO 1000", |
| 213 | + user="mz_system", |
| 214 | + port=6877, |
| 215 | + ) |
| 216 | + |
| 217 | + seed = args.seed or random.randint(0, 2**31) |
| 218 | + |
| 219 | + run_args = build_run_args(args, seed) |
| 220 | + |
| 221 | + print("--- Run in progress") |
| 222 | + result = c.run( |
| 223 | + service_name, |
| 224 | + *run_args, |
| 225 | + check=False, |
| 226 | + detach=True, |
| 227 | + capture=True, |
| 228 | + ) |
| 229 | + container_id = result.stdout.strip() |
| 230 | + |
| 231 | + # Print logs in a background thread so that we get immediate output in CI, |
| 232 | + # and also when running SQLancer locally |
| 233 | + thread = Thread(target=_print_logs, args=(container_id,)) |
| 234 | + thread.start() |
| 235 | + # At the same time capture the logs to analyze for finding new issues |
| 236 | + stdout = spawn.capture( |
| 237 | + ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT |
| 238 | + ) |
| 239 | + |
| 240 | + in_assertion = False |
| 241 | + for line in stdout.splitlines(): |
| 242 | + if "OutOfMemoryError" in line or "IgnoreMeException" in line: |
| 243 | + continue |
| 244 | + if line.startswith("--java.lang."): |
| 245 | + in_assertion = True |
| 246 | + print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}") |
| 247 | + elif line == "": |
| 248 | + in_assertion = False |
| 249 | + elif in_assertion: |
| 250 | + print(line) |
| 251 | + print(f"--- {result.stdout.splitlines()[-1]}") |
| 252 | + |
| 253 | + # Check for unexpected query errors in the logs |
| 254 | + logs = Path("logs") |
| 255 | + if logs.exists() and logs.is_dir(): |
| 256 | + shutil.rmtree(logs) |
| 257 | + spawn.runv( |
| 258 | + [ |
| 259 | + "docker", |
| 260 | + "cp", |
| 261 | + f"{container_id}:{docker_logs_path}", |
| 262 | + str(logs), |
| 263 | + ] |
| 264 | + ) |
| 265 | + spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)]) |
| 266 | + buildkite.upload_artifact("logs.tar.gz") |
| 267 | + unexpected_errors = check_query_errors(logs) |
| 268 | + if unexpected_errors: |
| 269 | + print("--- Unexpected query errors found:") |
| 270 | + for error in unexpected_errors: |
| 271 | + print(error) |
| 272 | + raise Exception( |
| 273 | + f"Found {len(unexpected_errors)} unexpected query error(s) in logs" |
| 274 | + ) |
0 commit comments