Skip to content

Commit b6eaf20

Browse files
authored
Merge pull request #89 from MSDLLCpapers/mcminn-test
Test pull request from Mcminn-test branch
2 parents 47fa89c + bb4aed6 commit b6eaf20

File tree

7 files changed

+1496
-42
lines changed

7 files changed

+1496
-42
lines changed
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import sys\n",
10+
"sys.path.insert(0, '../')\n",
11+
"\n",
12+
"print(sys.path)"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": null,
18+
"metadata": {},
19+
"outputs": [],
20+
"source": [
21+
"import obsidian\n",
22+
"print(f'obsidian version: ' + obsidian.__version__)\n",
23+
"\n",
24+
"from obsidian.experiment import AdvExpDesigner"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"# Define continuous parameters: key -> (low, high, step)\n",
34+
"\n",
35+
"continuous_params = {\n",
36+
" 'temperature': (20, 80, 5), # Linear steps of 5 between 20 and 80\n",
37+
" 'concentration': (0.1, 1.0, 0.1), # Linear steps of 0.1 between 0.1 and 1.0\n",
38+
" 'pressure': (1, 16, 'geometric'), # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
39+
" 'time': (10, 1000, 'logarithmic') # Logarithmic steps (powers of 10) between 10 and 1000\n",
40+
"}\n",
41+
"\n",
42+
"# Define conditional categorical parameters with subparameters and frequencies: key -> {subkey: {'freq': frequency, 'subparams': ([values], [frequencies])}}\n",
43+
"\n",
44+
"conditional_subparameters = {\n",
45+
" 'buffer_type': {\n",
46+
" 'A': {'freq': 0.4, 'pH': ([6.0, 7.0, 8.0], [0.3, 0.4, 0.3])},\n",
47+
" 'B': {'freq': 0.35, 'pH': ([5.0, 6.5], [0.7, 0.3])},\n",
48+
" 'C': {'freq': 0.25, 'pH': ([7.5, 8.5], [0.6, 0.4])}\n",
49+
" },\n",
50+
" 'catalyst': {\n",
51+
" 'X': {'freq': 0.5, 'loading': ([0.1, 0.2, 0.3], [0.2, 0.5, 0.3])},\n",
52+
" 'Y': {'freq': 0.3, 'loading': ([0.05, 0.15], [0.6, 0.4])},\n",
53+
" 'Z': {'freq': 0.2, 'loading': ([0.25, 0.35], [0.7, 0.3])}\n",
54+
" }\n",
55+
"}\n",
56+
"\n",
57+
"\n",
58+
"# Initialize the designer\n",
59+
"\n",
60+
"designer = AdvExpDesigner(continuous_params, conditional_subparameters)"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": null,
66+
"metadata": {},
67+
"outputs": [],
68+
"source": [
69+
"# Generate a design with 100 samples, optimizing categorical assignments\n",
70+
"design = designer.generate_design(seed=123, n_samples=100, optimize_categories=True)\n",
71+
"design"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"# Evaluate the design quality metrics\n",
81+
"metrics = designer.evaluate_design(design)\n",
82+
"print(\"Design quality metrics:\")\n",
83+
"for metric, value in metrics.items():\n",
84+
" print(f\" {metric}: {value:.4f}\")"
85+
]
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": null,
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"# Plot histograms of all parameters and subparameters\n",
94+
"designer.plot_histograms(design)"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": null,
100+
"metadata": {},
101+
"outputs": [],
102+
"source": [
103+
"# Plot PCA colored by 'buffer_type'\n",
104+
"designer.plot_pca(design, hue='buffer_type')\n",
105+
"\n",
106+
"# Plot UMAP colored by 'catalyst'\n",
107+
"designer.plot_umap(design, hue='catalyst')"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": null,
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"# Optimize design over 30 trials with 100 samples each\n",
117+
"best_design, metrics_df = designer.optimize_design(n_trials=30, n_samples=100)\n",
118+
"\n",
119+
"print(\"\\nBest design metrics after optimization:\")\n",
120+
"print(metrics_df.sort_values('score', ascending=False).head(1))\n"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": null,
126+
"metadata": {},
127+
"outputs": [],
128+
"source": [
129+
"# Plot quality evolution over trials\n",
130+
"designer.plot_quality_evolution(metrics_df)"
131+
]
132+
},
133+
{
134+
"cell_type": "code",
135+
"execution_count": null,
136+
"metadata": {},
137+
"outputs": [],
138+
"source": [
139+
"# Plot correlation matrix of the design\n",
140+
"\n",
141+
"designer.plot_correlation(best_design)"
142+
]
143+
},
144+
{
145+
"cell_type": "code",
146+
"execution_count": null,
147+
"metadata": {},
148+
"outputs": [],
149+
"source": [
150+
"# Extend the best design by 20 new samples over 10 trials\n",
151+
"extended_design, extension_summary = designer.extend_design(best_design, n=20, n_trials=10)\n",
152+
"\n",
153+
"print(\"\\nExtension summary:\")\n",
154+
"print(extension_summary)\n",
155+
"\n",
156+
"# Plot the extended design\n",
157+
"designer.plot_histograms(extended_design)"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"execution_count": null,
163+
"metadata": {},
164+
"outputs": [],
165+
"source": [
166+
"# Compare empirical vs expected frequencies for categorical variables\n",
167+
"designer.compare_frequencies(extended_design)"
168+
]
169+
}
170+
],
171+
"metadata": {
172+
"kernelspec": {
173+
"display_name": "obsidian",
174+
"language": "python",
175+
"name": "python3"
176+
},
177+
"language_info": {
178+
"codemirror_mode": {
179+
"name": "ipython",
180+
"version": 3
181+
},
182+
"file_extension": ".py",
183+
"mimetype": "text/x-python",
184+
"name": "python",
185+
"nbconvert_exporter": "python",
186+
"pygments_lexer": "ipython3",
187+
"version": "3.10.14"
188+
}
189+
},
190+
"nbformat": 4,
191+
"nbformat_minor": 4
192+
}

obsidian/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""obsidian: Automated experiment design and black-box optimization"""
2-
__version__ = '0.8.6'
2+
__version__ = '0.8.6-test'
33

44
# Import key objects
55
from obsidian.campaign import Campaign
@@ -10,6 +10,7 @@
1010
# Ensure that other subpackages are imported properly for documentation
1111
from obsidian.objectives import Objective
1212
from obsidian.experiment import ExpDesigner
13+
from obsidian.experiment import AdvExpDesigner
1314
import obsidian.constraints as constraints
1415
import obsidian.exceptions as exceptions
1516
import obsidian.acquisition as acquisition
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Parameters of scipy.stats.qmc.LatinHypercube
2+
3+
The `LatinHypercube` class generates Latin Hypercube Samples (LHS) in a multi-dimensional unit hypercube. It supports several parameters to control the sampling behavior, randomness, and sample quality.
4+
5+
## Parameters
6+
7+
### 1. `d` : int
8+
9+
**Description:**
10+
The dimension of the sampling space, i.e., the number of parameters or variables to sample simultaneously.
11+
12+
**Possible values:**
13+
Any positive integer (d > 0).
14+
15+
**Effect:**
16+
Determines the number of columns in the sample matrix. Each sample point is a vector of length d with values in [0, 1].
17+
18+
### 2. `seed` : int, array_like, np.random.Generator, or None, optional (default: None)
19+
20+
**Description:**
21+
Controls the random number generator used for sampling and scrambling.
22+
23+
**Possible values:**
24+
- An integer seed for reproducibility.
25+
- An instance of `np.random.Generator` for custom RNG.
26+
- An array-like seed.
27+
- `None` to use the default RNG.
28+
29+
**Effect:**
30+
Using a fixed seed ensures reproducible sampling results. Different seeds produce different sample sets.
31+
32+
### 3. `scramble` : bool, optional (default: False)
33+
34+
**Description:**
35+
Whether to apply scrambling to the Latin Hypercube design.
36+
37+
**Possible values:**
38+
- `True`: Apply scrambling.
39+
- `False`: No scrambling.
40+
41+
**Effect:**
42+
Scrambling adds randomness to the sample points while preserving the stratification property of LHS. This reduces correlation and improves uniformity, often resulting in better space-filling designs.
43+
44+
### 4. `strength` : int, optional (default: 1)
45+
46+
**Description:**
47+
The strength of the orthogonal array used to construct the LHS.
48+
49+
**Possible values:**
50+
- `1`: Standard Latin Hypercube (default).
51+
- `2` or higher: Higher strength orthogonal arrays, which enforce stronger uniformity constraints on projections of the sample points.
52+
53+
**Effect:**
54+
Increasing strength improves uniformity in lower-dimensional projections of the sample but may reduce the number of feasible samples and increase computational complexity.
55+
56+
### 5. `optimization` : str or None, optional (default: None)
57+
58+
**Description:**
59+
Method used to optimize the LHS design to improve space-filling properties.
60+
61+
**Possible values:**
62+
- `'random-cd'`: Random coordinate descent optimization.
63+
- `'centered'`: Centered Latin Hypercube design.
64+
- `'maximin'`: Maximize the minimum distance between points.
65+
- `None`: No optimization applied.
66+
67+
**Effect:**
68+
Optimization attempts to improve the distribution of points by reducing clustering and increasing uniformity. Different methods have different computational costs and effectiveness:
69+
- `'random-cd'`: Iteratively improves the design by random coordinate swaps.
70+
- `'centered'`: Places points at the center of intervals for better uniformity.
71+
- `'maximin'`: Maximizes the minimum pairwise distance between points, improving space-filling.
72+
73+
## Summary Table
74+
75+
| Parameter | Type | Default | Possible Values | Effect Summary |
76+
|-----------|------|---------|-----------------|----------------|
77+
| `d` | int || Positive integers | Number of dimensions sampled |
78+
| `seed` | int, array_like, RNG, None | `None` | Integer seed, RNG, or `None` | Controls reproducibility of samples |
79+
| `scramble` | bool | `False` | `True` or `False` | Adds randomness to reduce correlation and improve uniformity |
80+
| `strength` | int | `1` | 1, 2, 3, ... | Orthogonality strength; higher values improve uniformity in projections |
81+
| `optimization` | str or None | `None` | `'random-cd'`, `'centered'`, `'maximin'`, or `None` | Optimizes sample distribution for better space-filling |
82+
83+
## Notes
84+
85+
### Choosing `scramble`:
86+
Scrambling is generally recommended for better sample quality unless you need a deterministic, non-random design.
87+
88+
### Choosing `strength`:
89+
Use `strength=1` for standard LHS. Higher strengths improve uniformity but may limit sample size and increase complexity.
90+
91+
### Choosing `optimization`:
92+
Optimization improves sample uniformity but increases computation time. `'random-cd'` is a good balance for many applications.
93+
94+
### Reproducibility:
95+
Always set `seed` if you want reproducible results, especially when using scrambling or optimization.

obsidian/experiment/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
from .design import *
44
from .simulator import *
55
from .utils import *
6+
from .advanced_design import *

0 commit comments

Comments
 (0)