Skip to content

Commit 417f82f

Browse files
authored
Merge pull request #114 from OpenOptimizationOrg/feat/ga_consistency
Consistency check script
2 parents cfa8f0c + 0a0e719 commit 417f82f

File tree

4 files changed

+183
-0
lines changed

4 files changed

+183
-0
lines changed

utils/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# OPL YAML utils
2+
3+
This folder contains utility scripts for working with the YAML format to describe problems in context of OPL. They are mainly intended to be run automatically via GitHub Actions to make collaboration easier.
4+
5+
The intended way of adding a new problem to the repository is thus as follows:
6+
7+
* Change the [new_problem.yaml](new_problem.yaml) template file to fit the new problem.
8+
* Create a PR with the changes (for example with a fork).
9+
10+
What happens in the background then is:
11+
12+
* On PR creation and commits to the PR, the [validate_yaml.py](validate_yaml.py) script is run to check that the YAML file is valid and consistent. It is expecting the changes to be in the [new_problem.yaml](new_problem.yaml) file.
13+
* Then the PR should be reviewed manually.
14+
* When the PR is merged into the main branch, a second script runs (which doesn't exist yet), that adds the content of [new_problem.yaml](new_problem.yaml) to the [problems.yaml](../problems.yaml) file, and reverts the changes to the new_problem.yaml.
15+
16+
:alert: Note that the GitHubActions do not exist yet either, this is a WIP.
17+
18+
## validate_yaml.py
19+
20+
This script checks the new content for the following:
21+
22+
* The YAML syntax is valid and is in expected format
23+
* The required fields are present.
24+
* Specific fields are unique across the new set of problems (e.g. name)
25+
26+
:alert: Execute from root of the repository. Tested with python 3.12
27+
28+
```bash
29+
pip install -r utils/requirements.txt
30+
python utils/validate_yaml.py utils/new_problem.yaml
31+
```

utils/new_problem.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
- name: template
2+
suite/generator/single: suite
3+
objectives: '1'
4+
dimensionality: scalable
5+
variable type: continuous
6+
constraints: 'no'
7+
dynamic: 'no'
8+
noise: 'no'
9+
multimodal: 'yes'
10+
multi-fidelity: 'no'
11+
reference: ''
12+
implementation: ''
13+
source (real-world/artificial): ''
14+
textual description: 'This is a dummy template'

utils/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pyyaml
2+
pandas

utils/validate_yaml.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import yaml
2+
3+
import sys
4+
from pathlib import Path
5+
6+
# Add parent directory to sys.path
7+
parent = Path(__file__).resolve().parent.parent
8+
sys.path.insert(0, str(parent))
9+
10+
# Now you can import normally
11+
from yaml_to_html import default_columns as REQUIRED_FIELDS
12+
13+
OPTIONAL_FIELDS = ["multimodal"]
14+
UNIQUE_FIELDS = ["name"]
15+
NON_EMPTY_FIELDS = ["name"]
16+
UNIQUE_WARNING_FIELDS = ["reference", "implementation"]
17+
PROBLEMS_FILE = "problems.yaml"
18+
19+
20+
def read_data(filepath):
21+
try:
22+
with open(filepath, "r") as f:
23+
data = yaml.safe_load(f)
24+
return 0, data
25+
except FileNotFoundError:
26+
print(f"::error::File not found: {filepath}")
27+
return 1, None
28+
except yaml.YAMLError as e:
29+
print(f"::error::YAML syntax error: {e}")
30+
return 1, None
31+
32+
33+
def check_format(data):
34+
num_problems = len(data)
35+
if len(data) < 1:
36+
print("::error::YAML file should contain at least one top level entry.")
37+
return False
38+
print(f"::notice::YAML file contains {num_problems} top-level entries.")
39+
unique_fields = []
40+
for i, entry in enumerate(data):
41+
if not isinstance(entry, dict):
42+
print(f"::error::Entry {i} is not a dictionary.")
43+
return False
44+
unique_fields.append({k: v for k, v in entry.items() if k in UNIQUE_FIELDS})
45+
for k in UNIQUE_FIELDS:
46+
values = [entry[k] for entry in unique_fields]
47+
if len(values) != len(set(values)):
48+
print(f"::error::Field '{k}' must be unique across all entries.")
49+
return False
50+
return True
51+
52+
53+
def check_fields(data):
54+
missing = [field for field in REQUIRED_FIELDS if field not in data]
55+
if missing:
56+
print(f"::error::Missing required fields: {', '.join(missing)}")
57+
return False
58+
new_fields = [
59+
field for field in data if field not in REQUIRED_FIELDS + OPTIONAL_FIELDS
60+
]
61+
if new_fields:
62+
print(f"::warning::New field added: {', '.join(new_fields)}")
63+
# Check that the name is not still template
64+
if data.get("name") == "template":
65+
print(
66+
"::error::Please change the 'name' field from 'template' to a unique name."
67+
)
68+
return False
69+
# Check non-empty fields
70+
empty_fields = [
71+
field
72+
for field in NON_EMPTY_FIELDS
73+
if data.get(field, None) is None or data.get(field, "").strip() == ""
74+
]
75+
if empty_fields:
76+
print(
77+
f"::error::The following fields cannot be empty: {', '.join(empty_fields)}"
78+
)
79+
return False
80+
return True
81+
82+
83+
def check_novelty(data):
84+
# Load existing problems
85+
read_status, existing_data = read_data(PROBLEMS_FILE)
86+
if read_status != 0:
87+
print("::error::Could not read existing problems for novelty check.")
88+
return False
89+
assert existing_data is not None
90+
for field in UNIQUE_FIELDS + UNIQUE_WARNING_FIELDS:
91+
# skip empty fields
92+
if not data.get(field):
93+
continue
94+
existing_values = {
95+
entry.get(field) for entry in existing_data if isinstance(entry, dict)
96+
}
97+
if data.get(field) in existing_values:
98+
if field in UNIQUE_WARNING_FIELDS:
99+
print(
100+
f"::warning::Field '{field}' with value '{data.get(field)}' already exists. Consider choosing a unique value."
101+
)
102+
continue
103+
elif field in UNIQUE_FIELDS:
104+
print(
105+
f"::error::Field '{field}' with value '{data.get(field)}' already exists. Please choose a unique value."
106+
)
107+
return False
108+
return True
109+
110+
111+
def validate_yaml(filepath):
112+
status, data = read_data(filepath)
113+
if status != 0:
114+
sys.exit(1)
115+
if not check_format(data):
116+
sys.exit(1)
117+
assert data is not None
118+
119+
for i, new_data in enumerate(data): # Iterate through each top-level entry
120+
# Check required and unique fields
121+
if not check_fields(new_data) or not check_novelty(new_data):
122+
print(f"::error::Validation failed for entry {i+1}.")
123+
sys.exit(1)
124+
125+
# YAML is valid if we reach this point
126+
print("YAML syntax is valid.")
127+
sys.exit(0)
128+
129+
130+
if __name__ == "__main__":
131+
if len(sys.argv) < 2:
132+
print("::error::Usage: python validate_yaml.py <yourfile.yaml>")
133+
sys.exit(1)
134+
135+
filepath = sys.argv[1]
136+
validate_yaml(filepath)

0 commit comments

Comments
 (0)