Skip to content

Commit a75c534

Browse files
committed
Update benchmark configuration files
1 parent fbbe486 commit a75c534

File tree

6 files changed

+133
-67
lines changed

6 files changed

+133
-67
lines changed

resources/config/benchmark/benchmark_diff.yml

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
version: 4
1+
version: 5
22
##########
33
# Common #
44
##########
55
common:
66
# Path to the event log in CSV format
7-
train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
7+
train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
88
# Event log to evaluate the discovered BPS model with
9-
test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
9+
test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
1010
# Use observed arrival distributions
1111
use_observed_arrival_distribution: false
1212
# Specify the name for each of the columns in the CSV file (XES standard by default)
@@ -28,7 +28,7 @@ common:
2828
- arrival_event_distribution
2929
- cycle_time_distribution
3030
# Whether to discover case attributes or not
31-
discover_case_attributes: false
31+
discover_data_attributes: false
3232
#################
3333
# Preprocessing #
3434
#################
@@ -62,9 +62,7 @@ control_flow:
6262
- true
6363
- false
6464
# Whether to prioritize parallelism over loops or not
65-
prioritize_parallelism:
66-
- true
67-
- false
65+
prioritize_parallelism: true
6866
##################
6967
# Resource model #
7068
##################
@@ -83,9 +81,7 @@ resource_model:
8381
# Resource profile discovery type
8482
discovery_type: differentiated
8583
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
86-
granularity:
87-
- 15
88-
- 60
84+
granularity: 60
8985
# Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
9086
confidence:
9187
- 0.5
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
version: 5
2+
##########
3+
# Common #
4+
##########
5+
common:
6+
# Path to the event log in CSV format
7+
train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
8+
# Event log to evaluate the discovered BPS model with
9+
test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
10+
# Specify the name for each of the columns in the CSV file (XES standard by default)
11+
log_ids:
12+
case: "case_id"
13+
activity: "activity"
14+
resource: "resource"
15+
start_time: "start_time"
16+
end_time: "end_time"
17+
# Number of evaluations of the discovered BPS model
18+
num_final_evaluations: 10
19+
# Metrics to evaluate the discovered BPS model
20+
evaluation_metrics:
21+
- 3_gram_distance
22+
- 2_gram_distance
23+
- absolute_event_distribution
24+
- relative_event_distribution
25+
- circadian_event_distribution
26+
- arrival_event_distribution
27+
- cycle_time_distribution
28+
# Whether to discover case attributes or not
29+
discover_data_attributes: true
30+
#################
31+
# Preprocessing #
32+
#################
33+
preprocessing:
34+
multitasking: false
35+
enable_time_concurrency_threshold: 0.5
36+
################
37+
# Control-flow #
38+
################
39+
control_flow:
40+
# Metric to guide the optimization process (loss function to minimize)
41+
optimization_metric: two_gram_distance
42+
# Number of optimization iterations over the search space
43+
num_iterations: 30
44+
# Number of times to evaluate each iteration (using the mean of all of them)
45+
num_evaluations_per_iteration: 5
46+
# Methods for discovering gateway probabilities
47+
gateway_probabilities: discovery
48+
# Discover process model with SplitMiner v3
49+
mining_algorithm: sm1
50+
# Number of concurrent relations between events to be captured
51+
epsilon:
52+
- 0.05
53+
- 0.4
54+
# Threshold for filtering the incoming and outgoing edges
55+
eta:
56+
- 0.2
57+
- 0.7
58+
# Whether to replace non-trivial OR joins or not
59+
replace_or_joins:
60+
- true
61+
- false
62+
# Whether to prioritize parallelism over loops or not
63+
prioritize_parallelism: true
64+
# Discover data-aware branching rules, i.e., BPMN decision points based on value of data attributes
65+
discover_branch_rules: true
66+
# Minimum f-score value to consider the discovered data-aware branching rules
67+
f_score:
68+
- 0.3
69+
- 0.9
70+
##################
71+
# Resource model #
72+
##################
73+
resource_model:
74+
# Metric to guide the optimization process (loss function to minimize)
75+
optimization_metric: circadian_emd
76+
# Number of optimization iterations over the search space
77+
num_iterations: 40
78+
# Number of times to evaluate each iteration (using the mean of all of them)
79+
num_evaluations_per_iteration: 5
80+
# Whether to discover prioritization or batching behavior
81+
discover_prioritization_rules: false
82+
discover_batching_rules: false
83+
# Resource profiles configuration
84+
resource_profiles:
85+
# Resource profile discovery type
86+
discovery_type: differentiated
87+
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
88+
granularity: 60
89+
# Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
90+
confidence:
91+
- 0.5
92+
- 0.85
93+
# Minimum support of the intervals in the discovered calendar (of a resource or set of resources)
94+
support:
95+
- 0.05
96+
- 0.5
97+
# Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
98+
participation: 0.4

resources/config/benchmark/benchmark_diff_extr.yml

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
version: 4
1+
version: 5
22
##########
33
# Common #
44
##########
55
common:
66
# Path to the event log in CSV format
7-
train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
7+
train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
88
# Event log to evaluate the discovered BPS model with
9-
test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
10-
# Use observed arrival distributions
11-
use_observed_arrival_distribution: false
9+
test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
1210
# Specify the name for each of the columns in the CSV file (XES standard by default)
1311
log_ids:
1412
case: "case_id"
@@ -28,7 +26,7 @@ common:
2826
- arrival_event_distribution
2927
- cycle_time_distribution
3028
# Whether to discover case attributes or not
31-
discover_case_attributes: false
29+
discover_data_attributes: false
3230
#################
3331
# Preprocessing #
3432
#################
@@ -62,9 +60,7 @@ control_flow:
6260
- true
6361
- false
6462
# Whether to prioritize parallelism over loops or not
65-
prioritize_parallelism:
66-
- true
67-
- false
63+
prioritize_parallelism: true
6864
##################
6965
# Resource model #
7066
##################
@@ -83,9 +79,7 @@ resource_model:
8379
# Resource profile discovery type
8480
discovery_type: differentiated
8581
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
86-
granularity:
87-
- 15
88-
- 60
82+
granularity: 60
8983
# Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
9084
confidence:
9185
- 0.5
@@ -100,6 +94,8 @@ resource_model:
10094
# Extraneous delays #
10195
#####################
10296
extraneous_activity_delays:
97+
# Method to compute the extraneous delay (naive or eclipse-aware)
98+
discovery_method: eclipse-aware
10399
# Metric to guide the optimization process (loss function to minimize)
104100
optimization_metric: relative_emd
105101
# Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)

resources/config/benchmark/benchmark_fuzz.yml

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
version: 4
1+
version: 5
22
##########
33
# Common #
44
##########
55
common:
66
# Path to the event log in CSV format
7-
train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
7+
train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
88
# Event log to evaluate the discovered BPS model with
9-
test_log_path: ../../event_logs/AcademicCredentials_W_test.csv.gz
10-
# Use observed arrival distributions
11-
use_observed_arrival_distribution: false
9+
test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
1210
# Specify the name for each of the columns in the CSV file (XES standard by default)
1311
log_ids:
1412
case: "case_id"
@@ -28,7 +26,7 @@ common:
2826
- arrival_event_distribution
2927
- cycle_time_distribution
3028
# Whether to discover case attributes or not
31-
discover_case_attributes: false
29+
discover_data_attributes: false
3230
#################
3331
# Preprocessing #
3432
#################
@@ -62,9 +60,7 @@ control_flow:
6260
- true
6361
- false
6462
# Whether to prioritize parallelism over loops or not
65-
prioritize_parallelism:
66-
- true
67-
- false
63+
prioritize_parallelism: true
6864
##################
6965
# Resource model #
7066
##################
@@ -83,17 +79,7 @@ resource_model:
8379
# Resource profile discovery type
8480
discovery_type: differentiated_fuzzy
8581
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
86-
granularity:
87-
- 60
88-
- 120
82+
granularity: 60
8983
fuzzy_angle:
9084
- 0.1
9185
- 0.9
92-
#####################
93-
# Extraneous delays #
94-
#####################
95-
#extraneous_activity_delays:
96-
# Metric to guide the optimization process (loss function to minimize)
97-
# optimization_metric: relative_emd
98-
# Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)
99-
# num_iterations: 20

resources/config/benchmark/benchmark_fuzz_extr.yml

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
version: 4
1+
version: 5
22
##########
33
# Common #
44
##########
55
common:
66
# Path to the event log in CSV format
7-
train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
7+
train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
88
# Event log to evaluate the discovered BPS model with
9-
test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
10-
# Use observed arrival distributions
11-
use_observed_arrival_distribution: false
9+
test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
1210
# Specify the name for each of the columns in the CSV file (XES standard by default)
1311
log_ids:
1412
case: "case_id"
@@ -28,7 +26,7 @@ common:
2826
- arrival_event_distribution
2927
- cycle_time_distribution
3028
# Whether to discover case attributes or not
31-
discover_case_attributes: false
29+
discover_data_attributes: false
3230
#################
3331
# Preprocessing #
3432
#################
@@ -62,9 +60,7 @@ control_flow:
6260
- true
6361
- false
6462
# Whether to prioritize parallelism over loops or not
65-
prioritize_parallelism:
66-
- true
67-
- false
63+
prioritize_parallelism: true
6864
##################
6965
# Resource model #
7066
##################
@@ -83,16 +79,16 @@ resource_model:
8379
# Resource profile discovery type
8480
discovery_type: differentiated_fuzzy
8581
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
86-
granularity:
87-
- 60
88-
- 120
82+
granularity: 60
8983
fuzzy_angle:
9084
- 0.1
9185
- 0.9
9286
#####################
9387
# Extraneous delays #
9488
#####################
9589
extraneous_activity_delays:
90+
# Method to compute the extraneous delay (naive or eclipse-aware)
91+
discovery_method: eclipse-aware
9692
# Metric to guide the optimization process (loss function to minimize)
9793
optimization_metric: relative_emd
9894
# Number of optimization iterations over the search space (1 = direct discovery, no optimization stage)

resources/config/benchmark/benchmark_pool.yml

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
version: 4
1+
version: 5
22
##########
33
# Common #
44
##########
55
common:
66
# Path to the event log in CSV format
7-
train_log_path: ../../event_logs/AcademicCredentials_train.csv.gz
7+
train_log_path: ../../event_logs/BPIC_2012_W_train.csv.gz
88
# Event log to evaluate the discovered BPS model with
9-
test_log_path: ../../event_logs/AcademicCredentials_test.csv.gz
10-
# Use observed arrival distributions
11-
use_observed_arrival_distribution: false
9+
test_log_path: ../../event_logs/BPIC_2012_W_test.csv.gz
1210
# Specify the name for each of the columns in the CSV file (XES standard by default)
1311
log_ids:
1412
case: "case_id"
@@ -28,7 +26,7 @@ common:
2826
- arrival_event_distribution
2927
- cycle_time_distribution
3028
# Whether to discover case attributes or not
31-
discover_case_attributes: false
29+
discover_data_attributes: false
3230
#################
3331
# Preprocessing #
3432
#################
@@ -62,9 +60,7 @@ control_flow:
6260
- true
6361
- false
6462
# Whether to prioritize parallelism over loops or not
65-
prioritize_parallelism:
66-
- true
67-
- false
63+
prioritize_parallelism: true
6864
##################
6965
# Resource model #
7066
##################
@@ -83,9 +79,7 @@ resource_model:
8379
# Resource profile discovery type
8480
discovery_type: pool
8581
# Time granularity (in minutes) for the resource calendar (the higher the density of events in the log, the smaller the granularity can be)
86-
granularity:
87-
- 15
88-
- 60
82+
granularity: 60
8983
# Minimum confidence of the intervals in the discovered calendar (of a resource or set of resources)
9084
confidence:
9185
- 0.5
@@ -95,4 +89,4 @@ resource_model:
9589
- 0.05
9690
- 0.5
9791
# Participation of a resource in the process to discover a calendar for them (gathered together otherwise)
98-
participation: 0.4
92+
participation: 0.4

0 commit comments

Comments
 (0)