-
Notifications
You must be signed in to change notification settings - Fork 2.7k
[CI] Chaotic devnet test #4069
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CI] Chaotic devnet test #4069
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| #!/bin/bash | ||
|
|
||
| #shellcheck source=SCRIPTDIR/utils.sh | ||
| . ./.ci/utils.sh | ||
|
|
||
| # Network parameters | ||
| total_validators=7 | ||
| majority=$(( (total_validators - 1) / 3 + 1 )) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be over 50% to be majority? |
||
| network_id=0 | ||
| network_name="mainnet" | ||
|
|
||
| # Stopping conditions | ||
| restart_height=10 | ||
| final_height=20 | ||
|
|
||
| # Define a trap handler that cleans up all processes on exit. | ||
| trap stop_nodes EXIT | ||
|
|
||
| # Define a trap handler that prints a message when an error occurs | ||
| trap 'echo "⛔️ Error in $BASH_SOURCE at line $LINENO: \"$BASH_COMMAND\" failed (exit $?)"' ERR | ||
|
|
||
| # Start all validator nodes in the background | ||
| for ((validator_index = 0; validator_index < total_validators; validator_index++)); do | ||
| snarkos clean --dev $validator_index --network=$network_id | ||
|
|
||
| snarkos start --nodisplay --network $network_id --dev $validator_index --dev-num-validators $total_validators --validator & | ||
| PIDS[validator_index]=$! | ||
| echo "Started validator $validator_index with PID ${PIDS[$validator_index]}" | ||
| # Add 1-second delay between starting nodes to avoid hitting rate limits | ||
| sleep 1 | ||
| done | ||
|
|
||
| wait_for_nodes "$total_validators" 0 | ||
|
|
||
| total_wait=0 | ||
| while true; do | ||
| if check_heights 0 "$total_validators" "$restart_height" "$network_name"; then | ||
| echo "All nodes reached restart height." | ||
|
|
||
| # Gracefully shut down a majority of the validators | ||
| targets=( $(generate_random_indices "$majority" $(( ${#PIDS[@]} - 1 ))) ) | ||
| stop_some_nodes "${targets[@]}" | ||
|
|
||
| for target_index in "${targets[@]}"; do | ||
| # Remove the original ledger | ||
| snarkos clean "--network=$network_id" "--dev=$target_index" | ||
| done | ||
|
|
||
| # wait for a non-trivial amount of time | ||
| sleep 30 | ||
|
|
||
| for target_index in "${targets[@]}"; do | ||
| # Restart | ||
| snarkos start --nodisplay "--network=$network_id" "--dev=$target_index" "--dev-num-validators=$total_validators" \ | ||
| --validator & | ||
| PIDS[target_index]=$! | ||
| echo "Restarted a fresh validator $target_index with PID ${PIDS[$target_index]}" | ||
| # Add 1-second delay between starting nodes to avoid hitting rate limits | ||
| sleep 1 | ||
| done | ||
|
|
||
| total_wait=$((total_wait + 30 + $majority)) | ||
|
|
||
| break | ||
| fi | ||
|
|
||
| sleep 3 | ||
| total_wait=$((total_wait + 3)) | ||
| done | ||
|
|
||
| while (( total_wait < 600 )); do # 10 minutes max | ||
| if check_heights 0 "$total_validators" "$final_height" "$network_name"; then | ||
| echo "SUCCESS!" | ||
| exit 0 | ||
| fi | ||
|
|
||
| # Continue waiting | ||
| sleep 3 | ||
| total_wait=$((total_wait + 3)) | ||
| echo "Waited $total_wait seconds so far..." | ||
| done | ||
|
|
||
| # The main loop has expired by now | ||
| echo "❌ Test failed!" | ||
| exit 1 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| #!/bin/bash | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The script itself seems to do its idea right - in a great way. If you want you can do some of my suggestions I added to |
||
| #shellcheck source=SCRIPTDIR/utils.sh | ||
| . ./.ci/utils.sh | ||
|
|
||
| # Network parameters | ||
| total_validators=7 | ||
| minority=$(( (total_validators - 1) / 3 )) | ||
| network_id=0 | ||
| network_name="mainnet" | ||
|
|
||
| # Stopping conditions | ||
| num_iterations=3 | ||
| final_height=40 | ||
|
|
||
| # Define a trap handler that cleans up all processes on exit. | ||
| trap stop_nodes EXIT | ||
|
|
||
| # Define a trap handler that prints a message when an error occurs | ||
| trap 'echo "⛔️ Error in $BASH_SOURCE at line $LINENO: \"$BASH_COMMAND\" failed (exit $?)"' ERR | ||
|
|
||
| # Start all validator nodes in the background | ||
| for ((validator_index = 0; validator_index < total_validators; validator_index++)); do | ||
| snarkos clean --dev $validator_index --network=$network_id | ||
|
|
||
| snarkos start --nodisplay --network $network_id --dev $validator_index --dev-num-validators $total_validators --validator & | ||
| PIDS[validator_index]=$! | ||
| echo "Started validator $validator_index with PID ${PIDS[$validator_index]}" | ||
| # Add 1-second delay between starting nodes to avoid hitting rate limits | ||
| sleep 1 | ||
| done | ||
|
|
||
| wait_for_nodes "$total_validators" 0 | ||
|
|
||
| total_wait=0 | ||
| for ((iter = 1; iter <= num_iterations; iter++)); do | ||
| restart_height=$(( iter * 10 )); | ||
|
|
||
| while true; do | ||
| if check_heights 0 "$total_validators" "$restart_height" "$network_name"; then | ||
| echo "All nodes reached restart height." | ||
|
|
||
| # Gracefully shut down a minority of the validators | ||
| targets=( $(generate_random_indices "$minority" $(( ${#PIDS[@]} - 1 ))) ) | ||
| stop_some_nodes "${targets[@]}" | ||
|
|
||
| for target_index in "${targets[@]}"; do | ||
| # Remove the original ledger | ||
| snarkos clean "--network=$network_id" "--dev=$target_index" | ||
| # Wait until the cleanup concludes | ||
| sleep 1 | ||
| # Restart | ||
| snarkos start --nodisplay "--network=$network_id" "--dev=$target_index" "--dev-num-validators=$total_validators" \ | ||
| --validator & | ||
| PIDS[target_index]=$! | ||
| echo "Restarted a fresh validator $target_index with PID ${PIDS[$target_index]}" | ||
| # Add 1-second delay between starting nodes to avoid hitting rate limits | ||
| sleep 1 | ||
| total_wait=$((total_wait + 2)) | ||
| done | ||
|
|
||
| break | ||
| fi | ||
|
|
||
| sleep 3 | ||
| total_wait=$((total_wait + 3)) | ||
| done | ||
| done | ||
|
|
||
| while (( total_wait < 600 )); do # 10 minutes max | ||
| if check_heights 0 "$total_validators" "$final_height" "$network_name"; then | ||
| echo "SUCCESS!" | ||
| exit 0 | ||
| fi | ||
|
|
||
| # Continue waiting | ||
| sleep 3 | ||
| total_wait=$((total_wait + 3)) | ||
| echo "Waited $total_wait seconds so far..." | ||
| done | ||
|
|
||
| # The main loop has expired by now | ||
| echo "❌ Test failed!" | ||
| exit 1 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -143,7 +143,7 @@ function get_network_name() { | |
| esac | ||
| } | ||
|
|
||
| # Stops all running processe in the given list. | ||
| # Stops all running processes in the PIDS list. | ||
| function stop_nodes() { | ||
| echo "🚨 Cleaning up ${#PIDS[@]} process(es)…" | ||
| for pid in "${PIDS[@]}"; do | ||
|
|
@@ -156,6 +156,49 @@ function stop_nodes() { | |
| wait | ||
| } | ||
|
|
||
| # Generates the given number of random indices up to max_index. | ||
| function generate_random_indices() { | ||
| local count=$1 | ||
| local max_index=$2 | ||
|
|
||
| # Check if count is greater than max_index + 1 (impossible request) | ||
| if (( count > max_index + 1 )); then | ||
| echo "Error: Cannot request more unique indices than exist." >&2 | ||
| return 1 | ||
| fi | ||
|
|
||
| # shuf -i generates a range (0 to max), -n picks N items | ||
| shuf -i 0-"$max_index" -n "$count" | ||
| } | ||
|
|
||
| # Stops select running processes from the PIDS list. | ||
| function stop_some_nodes() { | ||
| local indices=("$@") | ||
| local killed_pids=() | ||
|
|
||
| echo "🚨 Stopping ${#indices[@]} selected node(s)..." | ||
|
|
||
| for i in "${indices[@]}"; do | ||
| # Get the PID from the global PIDS array using the index | ||
| local pid="${PIDS[$i]}" | ||
|
|
||
| # Check if PID exists (is not empty) and is currently running | ||
| if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then | ||
| echo "Killing PIDS[$i] -> $pid" | ||
| kill -9 "$pid" 2>/dev/null || true | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you want to kill the nodes with Otherwise these two functions seem great to me. |
||
| # Add to list of PIDs to wait for specifically | ||
| killed_pids+=("$pid") | ||
| else | ||
| echo "Skipping PIDS[$i] (PID: $pid) - Already dead or invalid." | ||
| fi | ||
| done | ||
|
|
||
| # Wait only for the processes we just killed to ensure they are gone | ||
| if [ ${#killed_pids[@]} -gt 0 ]; then | ||
| wait "${killed_pids[@]}" 2>/dev/null || true | ||
| fi | ||
| } | ||
|
|
||
| # Succeeds if all nodes are available. | ||
| function check_nodes() { | ||
| local total_validators=$1 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same comment as on the minority script.