diff --git a/.github/workflows/miniziti.yml b/.github/workflows/miniziti.yml index a7864fa5..919375ba 100644 --- a/.github/workflows/miniziti.yml +++ b/.github/workflows/miniziti.yml @@ -4,6 +4,7 @@ on: pull_request: paths: - .github/workflows/miniziti.yml + - run-miniziti.bash - charts/ziti-controller/** - charts/ziti-router/** - charts/httpbin/** @@ -48,11 +49,17 @@ jobs: matrix: ${{ fromJson(needs.compute-k8s-matrix.outputs.matrix) }} env: ZITI_NAMESPACE: miniziti - ZITI_CLI_VERSION: 1.7.2 steps: - name: Checkout workspace uses: actions/checkout@v4 + - name: Resolve latest Ziti CLI version + shell: bash + run: | + ZITI_CLI_TAG="$(curl -sSf https://api.github.com/repos/openziti/ziti/releases/latest | jq -r '.tag_name')" + echo "ZITI_CLI_TAG=${ZITI_CLI_TAG}" >> "$GITHUB_ENV" + echo "ZITI_CLI_VERSION=${ZITI_CLI_TAG#v}" >> "$GITHUB_ENV" + - name: Resolve latest patch for Kubernetes minor id: resolve_k8s shell: bash @@ -64,29 +71,27 @@ jobs: with: start-args: --profile ${{ env.ZITI_NAMESPACE }} --kubernetes-version=${{ steps.resolve_k8s.outputs.kubernetes_version }} - - name: install ziti cli + - name: Install ziti CLI uses: supplypike/setup-bin@v5 with: - uri: https://github.com/openziti/ziti/releases/download/v${{ env.ZITI_CLI_VERSION }}/ziti-linux-amd64-${{ env.ZITI_CLI_VERSION }}.tar.gz + uri: https://github.com/openziti/ziti/releases/download/${{ env.ZITI_CLI_TAG }}/ziti-linux-amd64-${{ env.ZITI_CLI_VERSION }}.tar.gz name: ziti version: ${{ env.ZITI_CLI_VERSION }} - # this is the kubernetes quickstart script from - # https://openziti.io/docs/learn/quickstarts/network/local-kubernetes - - name: install miniziti + - name: Install miniziti uses: supplypike/setup-bin@v5 with: # uri: https://get.openziti.io/miniziti.bash - uri: https://raw.githubusercontent.com/openziti/ziti/retire-ingress-nginx/quickstart/kubernetes/miniziti.bash - # uri: https://raw.githubusercontent.com/openziti/ziti//quickstart/kubernetes/miniziti.bash + # uri: https://raw.githubusercontent.com/openziti/ziti/main/quickstart/kubernetes/miniziti.bash + uri: https://raw.githubusercontent.com/openziti/ziti/codify-jwks-orchestration/quickstart/kubernetes/miniziti.bash name: miniziti - version: quickstartrelease + version: codify-jwks-orchestration - name: Select test mode id: mode shell: bash run: | - if [[ "${{ vars.SKIP_MINIKUBE_LATEST_CHARTS }}" == "true" ]]; then + if [[ "${{ vars.SKIP_MINIZITI_BASELINE }}" == "true" ]]; then echo "run_baseline=false" >> "$GITHUB_OUTPUT" echo "Running upgrade-only path" else @@ -94,509 +99,24 @@ jobs: echo "Running baseline+upgrade path" fi - - name: Install Loki for log aggregation - run: | - helm repo add grafana https://grafana.github.io/helm-charts - helm repo update - helm upgrade --install loki grafana/loki-stack \ - --namespace loki-stack \ - --create-namespace \ - --set loki.auth_enabled=false \ - --set promtail.enabled=true \ - --set grafana.enabled=false \ - --set test.enabled=false \ - --wait --timeout 120s - - - name: Run miniziti with latest release charts + - name: Run baseline + upgrade pipeline if: steps.mode.outputs.run_baseline == 'true' - run: miniziti start --no-hosts --verbose env: MINIZITI_TIMEOUT_SECS: 300 - - - name: Find miniziti ingress zone (initial) - if: steps.mode.outputs.run_baseline == 'true' - id: ingress_zone_initial - run: | - echo "ingress_zone=$(miniziti kubectl get configmap miniziti-config -n ${ZITI_NAMESPACE} -o jsonpath='{.data.ingress-zone}')" >> $GITHUB_OUTPUT - - - name: Find the ziti admin password (initial) - if: steps.mode.outputs.run_baseline == 'true' - id: get_ziti_pwd_initial - run: | - miniziti kubectl get secrets "ziti-controller-admin-secret" \ - --output go-template='{{index .data "admin-password" | base64decode }}' \ - | xargs -Iadmin_password echo "ZITI_PWD=admin_password" >> $GITHUB_OUTPUT - - - name: Enroll client identity - if: steps.mode.outputs.run_baseline == 'true' - run: > - ziti edge enroll - --jwt ~/.local/state/miniziti/profiles/${ZITI_NAMESPACE}/identities/${ZITI_NAMESPACE}-client.jwt - --out ~/.local/state/miniziti/profiles/${ZITI_NAMESPACE}/identities/${ZITI_NAMESPACE}-client.json - - - name: Run client proxy - if: steps.mode.outputs.run_baseline == 'true' + MINIZITI_VERSION: ${{ vars.MINIZITI_VERSION }} run: > - nohup ziti tunnel proxy "httpbin-service:4321" - --identity ~/.local/state/miniziti/profiles/${ZITI_NAMESPACE}/identities/${ZITI_NAMESPACE}-client.json - --verbose /tmp/miniziti-client.log & - - - name: Wait for proxy to serve the httpbin service - if: steps.mode.outputs.run_baseline == 'true' - id: wait_for_proxy_initial - continue-on-error: true - uses: iFaxity/wait-on-action@v1 - with: - resource: http://127.0.0.1:4321/get - delay: 1000 - interval: 1000 - timeout: 20000 - - - name: Send a POST request to the httpbin service - if: steps.mode.outputs.run_baseline == 'true' - id: test_httpbin_initial - continue-on-error: true - shell: bash - run: | - set -euo pipefail - curl -sSf -XPOST -F ziti=awesome http://127.0.0.1:4321/post > /tmp/httpbin-response.json - AWESOME=$(jq -r '.form.ziti[0]' /tmp/httpbin-response.json) - if [[ "$AWESOME" == "awesome" ]]; then - echo "✓ Ziti is awesome!" - else - echo "✗ Got '$AWESOME' instead of 'awesome'" >&2 - exit 1 - fi - - - name: Start Loki port-forward for log queries - if: steps.mode.outputs.run_baseline == 'true' && always() - shell: bash - run: | - # Start port-forward in background - miniziti kubectl port-forward -n loki-stack svc/loki 3100:3100 & - echo "LOKI_PID=$!" >> $GITHUB_ENV - sleep 5 - - - name: Query Loki for post-install hook logs - if: steps.mode.outputs.run_baseline == 'true' && always() - shell: bash - run: | - echo "==========================================" - echo "HTTPBin Post-Install Hook (from Loki)" - echo "==========================================" - - # Query Loki for hook logs - RESPONSE=$(curl -s "http://localhost:3100/loki/api/v1/query_range" \ - --data-urlencode "query={namespace=\"${ZITI_NAMESPACE}\",pod=~\".*post-install.*\"}" \ - --data-urlencode "start=$(date -u -d '10 minutes ago' +%s)000000000" \ - --data-urlencode "end=$(date -u +%s)000000000") - - echo "DEBUG: Loki response:" - echo "$RESPONSE" | jq '.' || echo "Failed to parse JSON: $RESPONSE" - - # Extract log lines - echo "" - echo "Log lines:" - echo "$RESPONSE" | jq -r '.data.result[].values[][1]' 2>/dev/null || echo "No logs found or failed to parse" - - echo "" - echo "==========================================" - echo "HTTPBin Application Logs (Initial Deploy)" - echo "==========================================" - - # Wait for httpbin to start hosting the Ziti service - echo "Waiting 10 seconds for httpbin to initialize..." - sleep 10 - - # Get httpbin pod logs - HTTPBIN_POD=$(miniziti kubectl get pods -n ${ZITI_NAMESPACE} -l app.kubernetes.io/name=httpbin -o jsonpath='{.items[0].metadata.name}') - if [[ -n "$HTTPBIN_POD" ]]; then - echo "HTTPBin pod: $HTTPBIN_POD" - miniziti kubectl logs -n ${ZITI_NAMESPACE} "$HTTPBIN_POD" --tail=50 || echo "Failed to get httpbin logs" - else - echo "WARNING: No httpbin pod found" - fi - - - name: Install the zrok chart from the latest release - if: steps.mode.outputs.run_baseline == 'true' - shell: bash - env: - ZITI_MGMT_API_HOST: miniziti-controller.${{ steps.ingress_zone_initial.outputs.ingress_zone }} - ZITI_PWD: ${{ steps.get_ziti_pwd_initial.outputs.ZITI_PWD }} - ZROK_DNS_ZONE: ${{ steps.ingress_zone_initial.outputs.ingress_zone }} - run: | - helm upgrade \ - --install \ - --namespace zrok --create-namespace \ - --values ./charts/zrok/values-ingress-traefik.yaml \ - --set "ziti.advertisedHost=${ZITI_MGMT_API_HOST}" \ - --set "ziti.password=${ZITI_PWD}" \ - --set "dnsZone=${ZROK_DNS_ZONE}" \ - --set "controller.ingress.hosts[0]=zrok.${ZROK_DNS_ZONE}" \ - --set "test.enabled=false" \ - zrok openziti/zrok - - - name: Capture zrok controller and frontend logs (before upgrade) - if: steps.mode.outputs.run_baseline == 'true' - shell: bash - run: | - echo "==========================================" - echo "Zrok Controller Logs (Before Upgrade)" - echo "==========================================" - - # Wait for zrok pods to initialize - echo "Waiting 10 seconds for zrok to initialize..." - sleep 10 - - # Get zrok controller logs - ZROK_CTRL_POD=$(miniziti kubectl get pods -n zrok -l app.kubernetes.io/name=zrok-controller -o jsonpath='{.items[0].metadata.name}') - if [[ -n "$ZROK_CTRL_POD" ]]; then - echo "Zrok controller pod: $ZROK_CTRL_POD" - miniziti kubectl logs -n zrok "$ZROK_CTRL_POD" --tail=50 || echo "Failed to get zrok controller logs" - else - echo "WARNING: No zrok controller pod found" - fi - - echo "" - echo "==========================================" - echo "Zrok Frontend Logs (Before Upgrade)" - echo "==========================================" - - # Get zrok frontend logs - ZROK_FRONTEND_POD=$(miniziti kubectl get pods -n zrok -l app.kubernetes.io/name=zrok-frontend -o jsonpath='{.items[0].metadata.name}') - if [[ -n "$ZROK_FRONTEND_POD" ]]; then - echo "Zrok frontend pod: $ZROK_FRONTEND_POD" - miniziti kubectl logs -n zrok "$ZROK_FRONTEND_POD" --tail=50 || echo "Failed to get zrok frontend logs" - else - echo "WARNING: No zrok frontend pod found" - fi + bash run-miniziti.bash + testvalues baseline proxy-test zrok upgrade verify proxy-test zrok-test - - name: Create test values directory - run: | - mkdir -p testvalues - cat > testvalues/httpbin.yaml <> $GITHUB_OUTPUT - - - name: Find the ziti admin password (current) - id: get_ziti_pwd - run: | - miniziti kubectl get secrets "ziti-controller-admin-secret" \ - --output go-template='{{index .data "admin-password" | base64decode }}' \ - | xargs -Iadmin_password echo "ZITI_PWD=admin_password" >> $GITHUB_OUTPUT - - - name: Query Loki for post-upgrade hook logs - if: steps.mode.outputs.run_baseline == 'true' - shell: bash - run: | - echo "==========================================" - echo "HTTPBin Post-Upgrade Hook (from Loki)" - echo "==========================================" - - # Reuse existing port-forward (already running from previous step) - # Query Loki for hook logs - RESPONSE=$(curl -s "http://localhost:3100/loki/api/v1/query_range" \ - --data-urlencode "query={namespace=\"${ZITI_NAMESPACE}\",pod=~\".*post-upgrade.*\"}" \ - --data-urlencode "start=$(date -u -d '10 minutes ago' +%s)000000000" \ - --data-urlencode "end=$(date -u +%s)000000000") - - echo "DEBUG: Loki response:" - echo "$RESPONSE" | jq '.' || echo "Failed to parse JSON: $RESPONSE" - - # Extract log lines - echo "" - echo "Log lines:" - echo "$RESPONSE" | jq -r '.data.result[].values[][1]' 2>/dev/null || echo "No logs found or failed to parse" - - echo "" - echo "==========================================" - echo "HTTPBin Application Logs (After Upgrade)" - echo "==========================================" - - # Wait for httpbin to restart and initialize after upgrade - echo "Waiting 10 seconds for httpbin to initialize after upgrade..." - sleep 10 - - # Get httpbin pod logs - HTTPBIN_POD=$(miniziti kubectl get pods -n ${ZITI_NAMESPACE} -l app.kubernetes.io/name=httpbin -o jsonpath='{.items[0].metadata.name}') - if [[ -n "$HTTPBIN_POD" ]]; then - echo "HTTPBin pod: $HTTPBIN_POD" - miniziti kubectl logs -n ${ZITI_NAMESPACE} "$HTTPBIN_POD" --tail=50 || echo "Failed to get httpbin logs" - else - echo "WARNING: No httpbin pod found" - fi - - - name: Stop Loki port-forward - if: always() - shell: bash - run: | - if [[ -n "${LOKI_PID:-}" ]]; then - kill $LOKI_PID || true - echo "Stopped Loki port-forward (PID: $LOKI_PID)" - fi - - - name: Verify the Console is Available - shell: bash - run: | - set -euo pipefail - curl -skSfw '%{http_code}\t%{url}\n' -o/dev/null \ - https://miniziti-controller.${{ steps.ingress_zone.outputs.ingress_zone }}/zac/ - - - name: Enroll client identity (current charts) - if: steps.mode.outputs.run_baseline != 'true' + MINIZITI_VERSION: ${{ vars.MINIZITI_VERSION }} run: > - ziti edge enroll - --jwt ~/.local/state/miniziti/profiles/${ZITI_NAMESPACE}/identities/${ZITI_NAMESPACE}-client.jwt - --out ~/.local/state/miniziti/profiles/${ZITI_NAMESPACE}/identities/${ZITI_NAMESPACE}-client.json + bash run-miniziti.bash + testvalues upgrade verify proxy-test zrok-test - - name: Run client proxy (current charts) - if: steps.mode.outputs.run_baseline != 'true' - run: > - nohup ziti tunnel proxy "httpbin-service:4321" - --identity ~/.local/state/miniziti/profiles/${ZITI_NAMESPACE}/identities/${ZITI_NAMESPACE}-client.json - --verbose /tmp/miniziti-client.log & - - - name: Wait for proxy to serve the httpbin service (after upgrade) - id: wait_for_proxy_upgrade - continue-on-error: true - uses: iFaxity/wait-on-action@v1 - with: - resource: http://127.0.0.1:4321/get - delay: 1000 - interval: 1000 - timeout: 20000 - - - name: Send a POST request to the httpbin service (after upgrade) - id: test_httpbin_upgrade - continue-on-error: true - shell: bash - run: | - set -euo pipefail - curl -sSf -XPOST -F ziti=awesome http://127.0.0.1:4321/post > /tmp/httpbin-response-upgrade.json - AWESOME=$(jq -r '.form.ziti[0]' /tmp/httpbin-response-upgrade.json) - if [[ "$AWESOME" == "awesome" ]]; then - echo "✓ Ziti is awesome (after upgrade)!" - else - echo "✗ Got '$AWESOME' instead of 'awesome'" >&2 - exit 1 - fi - - - name: Upgrade the zrok chart from the current branch and run the test job - shell: bash - env: - ZITI_MGMT_API_HOST: miniziti-controller.${{ steps.ingress_zone.outputs.ingress_zone }} - ZITI_PWD: ${{ steps.get_ziti_pwd.outputs.ZITI_PWD }} - ZROK_DNS_ZONE: ${{ steps.ingress_zone.outputs.ingress_zone }} - run: | - helm upgrade \ - --install \ - --namespace zrok --create-namespace \ - --values ./charts/zrok/values-ingress-traefik.yaml \ - --set "ziti.advertisedHost=${ZITI_MGMT_API_HOST}" \ - --set "ziti.password=${ZITI_PWD}" \ - --set "dnsZone=${ZROK_DNS_ZONE}" \ - --set "controller.ingress.hosts[0]=zrok.${ZROK_DNS_ZONE}" \ - --set "test.enabled=true" \ - zrok ./charts/zrok - - - name: Check zrok test job result - shell: bash - run: | - miniziti kubectl -n zrok wait --for=condition=complete --timeout=180s job/zrok-test-job - - - name: Print debug info + - name: Debug if: always() - shell: bash - run: | - set +e - set -x - - echo "==========================================" - echo "Kubernetes Resources Overview" - echo "==========================================" - miniziti kubectl get pods -A - miniziti kubectl get services -A - miniziti kubectl get ingresses -A - miniziti kubectl get secrets -n ${ZITI_NAMESPACE} - - echo "" - echo "==========================================" - echo "HTTPBin Pod Details" - echo "==========================================" - miniziti kubectl get pods -n ${ZITI_NAMESPACE} -l app.kubernetes.io/name=httpbin -o wide - miniziti kubectl describe pod -n ${ZITI_NAMESPACE} -l app.kubernetes.io/name=httpbin - - echo "" - echo "==========================================" - echo "HTTPBin Identity Secret Analysis" - echo "==========================================" - - # Get the base64-encoded identity JSON from the secret - IDENTITY_B64=$(miniziti kubectl get secret -n ${ZITI_NAMESPACE} miniziti-httpbin-identity -o jsonpath='{.data.ziti_identity_json}') - - echo "Secret data keys:" - miniziti kubectl get secret -n ${ZITI_NAMESPACE} miniziti-httpbin-identity -o jsonpath='{.data}' | jq -r 'keys' - - echo "" - echo "Base64-encoded identity length: ${#IDENTITY_B64} characters" - - # Check if secret data is empty - if [[ -z "$IDENTITY_B64" ]]; then - echo "✗ WARNING: Secret data is empty! Identity was not created or enrolled." - echo " This explains why httpbin cannot bind to the service." - else - # Decode and validate JSON structure - echo "" - echo "Decoding and validating identity JSON..." - IDENTITY_JSON=$(echo "$IDENTITY_B64" | base64 -d) - - echo "Decoded JSON length: ${#IDENTITY_JSON} characters" - - # Validate it's valid JSON (non-empty) - if [[ -z "$IDENTITY_JSON" ]]; then - echo "✗ WARNING: Decoded identity is empty!" - elif echo "$IDENTITY_JSON" | jq empty 2>/dev/null; then - echo "✓ Valid JSON structure" - - # Check for Ziti identity structure - echo "" - echo "Ziti identity structure validation:" - echo " Top-level keys: $(echo "$IDENTITY_JSON" | jq -r 'keys | join(", ")')" - - # Check for required Ziti identity fields - HAS_ID=$(echo "$IDENTITY_JSON" | jq 'has("id")') - HAS_ZTAPI=$(echo "$IDENTITY_JSON" | jq 'has("ztAPI")') - echo " Has 'id' key: $HAS_ID" - echo " Has 'ztAPI' key: $HAS_ZTAPI" - - if [[ "$HAS_ID" == "true" ]]; then - echo "" - echo " 'id' object keys: $(echo "$IDENTITY_JSON" | jq -r '.id | keys | join(", ")')" - - # Check for nested certificate components - HAS_CERT=$(echo "$IDENTITY_JSON" | jq 'has("id") and (.id | has("cert"))') - HAS_KEY=$(echo "$IDENTITY_JSON" | jq 'has("id") and (.id | has("key"))') - HAS_CA=$(echo "$IDENTITY_JSON" | jq 'has("id") and (.id | has("ca"))') - - echo " Has 'id.cert': $HAS_CERT" - echo " Has 'id.key': $HAS_KEY" - echo " Has 'id.ca': $HAS_CA" - - # Show cert/key formats (first few chars to confirm PEM format) - if [[ "$HAS_CERT" == "true" ]]; then - CERT_START=$(echo "$IDENTITY_JSON" | jq -r '.id.cert' | head -c 30) - echo " Certificate starts with: $CERT_START" - fi - - if [[ "$HAS_KEY" == "true" ]]; then - KEY_START=$(echo "$IDENTITY_JSON" | jq -r '.id.key' | head -c 30) - echo " Key starts with: $KEY_START" - fi - else - echo " ✗ WARNING: Missing 'id' key - not a valid Ziti identity!" - fi - else - echo "✗ WARNING: Invalid JSON - parsing failed!" - echo "First 500 chars: $IDENTITY_JSON" | head -c 500 - fi - fi - - echo "" - echo "==========================================" - echo "HTTPBin Pod Environment Variables" - echo "==========================================" - HTTPBIN_POD=$(miniziti kubectl get pods -n ${ZITI_NAMESPACE} -l app.kubernetes.io/name=httpbin -o jsonpath='{.items[0].metadata.name}') - if [[ -n "$HTTPBIN_POD" ]]; then - # Use kubectl to get env vars from pod spec (distroless image doesn't have grep) - echo "Environment variables from pod spec:" - miniziti kubectl get pod -n ${ZITI_NAMESPACE} "$HTTPBIN_POD" -o jsonpath='{range .spec.containers[0].env[*]}{.name}{"="}{.value}{"\n"}{end}' - fi - - echo "" - echo "==========================================" - echo "NOTE: Hook logs captured in earlier steps" - echo " - Post-install hook: after initial deployment" - echo " - Post-upgrade hook: after branch upgrade" - echo " Hooks are deleted after completion, so they" - echo " must be captured immediately after each deploy." - echo "==========================================" - - echo "" - echo "==========================================" - echo "Ziti Controller Logs (last 100 lines)" - echo "==========================================" - miniziti kubectl logs --selector app.kubernetes.io/component=ziti-controller --tail=100 - - echo "" - echo "==========================================" - echo "Ziti Router Logs (last 100 lines)" - echo "==========================================" - miniziti kubectl logs --selector app.kubernetes.io/component=ziti-router --tail=100 - - echo "" - echo "==========================================" - echo "HTTPBin Pod Logs (FULL - Time Separated)" - echo "==========================================" - sleep 2 # Give logs time to settle - miniziti kubectl logs --selector app.kubernetes.io/name=httpbin --tail=-1 - - echo "" - echo "==========================================" - echo "Miniziti Client Proxy Logs" - echo "==========================================" - cat /tmp/miniziti-client.log - - echo "" - echo "==========================================" - echo "JWT Files" - echo "==========================================" - cat ~/.local/state/miniziti/profiles/miniziti/identities/*.jwt || true - - echo "" - echo "==========================================" - echo "Ziti Network Configuration" - echo "==========================================" - # Login to Ziti and check service policies and terminators - miniziti login - - echo "" - echo "--- Service Policy Advisor ---" - ziti edge policy-advisor services --quiet httpbin-service - - echo "" - echo "--- Terminators ---" - ziti edge list terminators - - echo "" - echo "--- Identities ---" - ziti edge list identities - - echo "" - echo "--- Service Policies ---" - ziti edge list service-policies - - echo "" - echo "--- Services ---" - ziti edge list services - - echo "" - echo "==========================================" - echo "Zrok Logs" - echo "==========================================" - miniziti kubectl logs --selector app.kubernetes.io/name=zrok-controller -n zrok -c zrok-bootstrap --tail=-1 || true - miniziti kubectl logs --selector app.kubernetes.io/name=zrok-controller -n zrok -c zrok --tail=-1 || true - miniziti kubectl logs --selector app.kubernetes.io/name=zrok-frontend -n zrok -c zrok-bootstrap-frontend --tail=-1 || true - miniziti kubectl logs --selector app.kubernetes.io/name=zrok-frontend -n zrok -c zrok-frontend --tail=-1 || true - miniziti kubectl -n zrok logs job/zrok-test-job || true - - exit 0 + run: bash run-miniziti.bash debug diff --git a/.gitignore b/.gitignore index 755028fe..77b49e43 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ __snapshot__ # top-level directory with local test data /valuestest/ +/testvalues/ diff --git a/charts/ziti-controller/Chart.yaml b/charts/ziti-controller/Chart.yaml index 5dd84d89..0ee39009 100644 --- a/charts/ziti-controller/Chart.yaml +++ b/charts/ziti-controller/Chart.yaml @@ -3,4 +3,4 @@ appVersion: 1.7.2 description: Host an OpenZiti controller in Kubernetes name: ziti-controller type: application -version: 3.1.0 +version: 3.1.1 diff --git a/charts/ziti-controller/README.md b/charts/ziti-controller/README.md index a4bb4a0f..731d124b 100644 --- a/charts/ziti-controller/README.md +++ b/charts/ziti-controller/README.md @@ -2,7 +2,7 @@ # ziti-controller -![Version: 3.1.0](https://img.shields.io/badge/Version-3.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.7.2](https://img.shields.io/badge/AppVersion-1.7.2-informational?style=flat-square) +![Version: 3.1.1](https://img.shields.io/badge/Version-3.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.7.2](https://img.shields.io/badge/AppVersion-1.7.2-informational?style=flat-square) Host an OpenZiti controller in Kubernetes diff --git a/charts/ziti-controller/templates/_helpers.tpl b/charts/ziti-controller/templates/_helpers.tpl index 217e29cf..73db5767 100644 --- a/charts/ziti-controller/templates/_helpers.tpl +++ b/charts/ziti-controller/templates/_helpers.tpl @@ -134,6 +134,18 @@ that are managed by cert-manager {{- dict "certManagerCerts" $filteredCerts | toJson -}} {{- end -}} +{{/* +Resolve the organization used in server certificate subjects. +*/}} +{{- define "ziti-controller.serverCertSubjectOrganization" -}} + {{- $edition := (get .Values "edition") | default dict -}} + {{- if (get $edition "enterprise" | default false) -}} +Enterprise Edition + {{- else -}} +OpenZiti Community + {{- end -}} +{{- end -}} + {{/* Validate cluster mode. Returns one of: "standalone", "cluster-init", "cluster-join", "cluster-migrate". diff --git a/charts/ziti-controller/templates/alt-certificate.yaml b/charts/ziti-controller/templates/alt-certificate.yaml index 09d55bae..90dfcc70 100644 --- a/charts/ziti-controller/templates/alt-certificate.yaml +++ b/charts/ziti-controller/templates/alt-certificate.yaml @@ -10,6 +10,9 @@ metadata: name: {{ printf "%s-alt-cert-%d" (include "ziti-controller.fullname" $) $index }} namespace: {{ $.Release.Namespace }} spec: + subject: + organizations: + - {{ include "ziti-controller.serverCertSubjectOrganization" $ | quote }} {{- if $cert.secretName }} secretName: {{ $cert.secretName | quote }} {{- else }} diff --git a/charts/ziti-controller/templates/ca-ctrl-identity.yaml b/charts/ziti-controller/templates/ca-ctrl-identity.yaml index e53273c5..316fe86d 100644 --- a/charts/ziti-controller/templates/ca-ctrl-identity.yaml +++ b/charts/ziti-controller/templates/ca-ctrl-identity.yaml @@ -60,6 +60,9 @@ metadata: {{- include "ziti-controller.labels" . | nindent 4 }} spec: commonName: {{ default (printf "%s-ctrl-plane-identity" (include "ziti-controller.fullname" .)) .Values.cluster.nodeName }} + subject: + organizations: + - {{ include "ziti-controller.serverCertSubjectOrganization" . | quote }} secretName: {{ include "ziti-controller.fullname" . }}-ctrl-plane-identity-secret isCA: false duration: {{ .Values.cert.duration }} diff --git a/charts/ziti-controller/templates/ca-web-identity.yaml b/charts/ziti-controller/templates/ca-web-identity.yaml index 7c7d5e48..a7421433 100644 --- a/charts/ziti-controller/templates/ca-web-identity.yaml +++ b/charts/ziti-controller/templates/ca-web-identity.yaml @@ -64,6 +64,9 @@ metadata: {{- include "ziti-controller.labels" . | nindent 4 }} spec: commonName: {{ include "ziti-controller.fullname" . }}-web-identity + subject: + organizations: + - {{ include "ziti-controller.serverCertSubjectOrganization" . | quote }} secretName: {{ include "ziti-controller.fullname" . }}-web-identity-secret isCA: false duration: {{ .Values.cert.duration }} @@ -115,6 +118,9 @@ metadata: {{- include "ziti-controller.labels" . | nindent 4 }} spec: commonName: {{ include "ziti-controller.fullname" . }}-mgmt + subject: + organizations: + - {{ include "ziti-controller.serverCertSubjectOrganization" . | quote }} secretName: {{ include "ziti-controller.fullname" . }}-web-mgmt-api-secret isCA: false duration: {{ .Values.cert.duration }} @@ -162,6 +168,9 @@ metadata: {{- include "ziti-controller.labels" . | nindent 4 }} spec: commonName: {{ include "ziti-controller.fullname" . }}-prometheus + subject: + organizations: + - {{ include "ziti-controller.serverCertSubjectOrganization" . | quote }} secretName: {{ include "ziti-controller.fullname" . }}-web-prometheus-metrics-secret isCA: false duration: {{ .Values.cert.duration }} diff --git a/matrix-test.bash b/matrix-test.bash new file mode 100755 index 00000000..68932cfd --- /dev/null +++ b/matrix-test.bash @@ -0,0 +1,337 @@ +#!/usr/bin/env bash +# matrix-test.bash — triangulate JWKS stale-cache failure across K8s × ziti versions +# +# Context: see ci-failure-analysis-server-cert-subjects.md +# +# After a ziti-controller pod restart the controller generates a new ephemeral +# OIDC JWT signing key. The ziti-router caches the JWKS at startup and does +# NOT re-fetch it on reconnect, so JWT validation fails with: +# "JWT validation failed: public key not found" +# +# Dimensions: +# K8s minor : top 3 stable minors (mirrors .github/workflows/miniziti.yml) +# ziti version : VERSIONS array +# restart combo : A=none B=ctrl-only D=ctrl+wait C=ctrl+router +# +# K8s minor iterations run in parallel; each gets a distinct minikube profile +# (and ZITI_NAMESPACE) of the form "miniziti-1-" so that minikube +# profiles and k8s contexts never collide. run-miniziti.bash passes +# --kube-context "${ZITI_NAMESPACE}" to every helm call, so parallel runs are +# fully isolated. +# +# Usage: +# bash matrix-test.bash +# VERSIONS="1.6.12 1.7.2" bash matrix-test.bash +# K8S_MINORS="1.32" VERSIONS="1.7.2" bash matrix-test.bash # single cell +# MATRIX_PARALLELISM=1 bash matrix-test.bash # serial +# +# Environment variables: +# VERSIONS space-separated ziti image versions (default: 1.6.12 1.7.2 1.8.0-pre5) +# K8S_MINORS space-separated K8s minor versions (default: top 3 stable) +# MATRIX_PARALLELISM K8s-minor workers to run at once (default: 1) +# WORKER_START_DELAY_SECS seconds to sleep between launching workers (default: 0) +# MINIZITI_TIMEOUT_SECS forwarded to run-miniziti.bash (default: 600) +# UPGRADE_SETTLE_SECS settle wait after upgrade before combo A (default: 30) +# JWKS_WAIT_SECS timed wait after ctrl restart for combo D (default: 300) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RUNNER="${SCRIPT_DIR}/run-miniziti.bash" +TIMESTAMP="$(date -u +%Y%m%dT%H%M%SZ)" +RESULTS_FILE="${SCRIPT_DIR}/matrix-results-${TIMESTAMP}.md" +RESULT_DIR="$(mktemp -d -t matrix-results-XXXXXX)" +trap 'rm -rf "${RESULT_DIR}"' EXIT + +# ── config ──────────────────────────────────────────────────────────────────── +read -ra VERSIONS <<< "${VERSIONS:-1.6.12 1.7.2 1.8.0-pre5}" +MATRIX_PARALLELISM="${MATRIX_PARALLELISM:-1}" +WORKER_START_DELAY_SECS="${WORKER_START_DELAY_SECS:-0}" +UPGRADE_SETTLE_SECS="${UPGRADE_SETTLE_SECS:-30}" +JWKS_WAIT_SECS="${JWKS_WAIT_SECS:-300}" + +# Default timeout is higher than run-miniziti.bash's default to account for +# parallel resource contention across multiple clusters +export MINIZITI_TIMEOUT_SECS="${MINIZITI_TIMEOUT_SECS:-600}" + +LOCAL_BIN="${HOME}/.local/bin" +export PATH="${LOCAL_BIN}:${PATH}" + +# ── logging ─────────────────────────────────────────────────────────────────── +ts() { date -u '+%H:%M:%S'; } +log() { printf '\n[matrix %s] %s\n' "$(ts)" "$*"; } +log_sep() { printf '\n[matrix %s] %s\n' "$(ts)" "═══════════════════════════════════════════════"; } + +# ── K8s minor resolution ────────────────────────────────────────────────────── +compute_k8s_minors() { + local stable major rest minor + stable="$(curl -fsSL https://dl.k8s.io/release/stable.txt)" + stable="${stable#v}" + major="${stable%%.*}" + rest="${stable#*.}" + minor="${rest%%.*}" + echo "${major}.$((minor)) ${major}.$((minor - 1)) ${major}.$((minor - 2))" +} + +resolve_k8s_patch() { + curl -fsSL "https://dl.k8s.io/release/stable-${1}.txt" +} + +# ── K8s namespace/profile name from minor ───────────────────────────────────── +# "1.35" → "miniziti-1-35" (dots replaced with hyphens) +k8s_ns() { printf 'miniziti-%s' "${1//./-}"; } + +# ── helpers ─────────────────────────────────────────────────────────────────── +# Run stages that MUST succeed; relies on caller having exported ZITI_NAMESPACE +must_run() { bash "${RUNNER}" "$@"; } + +# Run a single stage and write "PASS" or "FAIL" to result_file +probe_and_record() { + local stage="$1" result_file="$2" + local exit_code=0 + bash "${RUNNER}" "${stage}" || exit_code=$? + if [[ ${exit_code} -eq 0 ]]; then + echo "PASS" > "${result_file}" + else + printf '[%s] stage "%s" exited %d → FAIL\n' "${ZITI_NAMESPACE}" "${stage}" "${exit_code}" >&2 + echo "FAIL" > "${result_file}" + fi +} + +# ── per-K8s-minor worker (runs in a background subshell) ────────────────────── +run_k8s_worker() { + # Clear the parent's EXIT trap so this background subshell does not delete + # RESULT_DIR when it exits (only the parent should do that cleanup). + trap - EXIT + + local k8s_minor="$1" + local k8s_patch="$2" + local ns + ns="$(k8s_ns "${k8s_minor}")" + + # Each worker exports its own ZITI_NAMESPACE, KUBERNETES_VERSION, and + # TESTVALUES_DIR so that all child bash "${RUNNER}" processes are isolated. + export ZITI_NAMESPACE="${ns}" + export KUBERNETES_VERSION="${k8s_patch}" + export TESTVALUES_DIR="${SCRIPT_DIR}/testvalues/${ns}" + + local pfx="[${ns}]" + printf '%s K8s %s (%s) — starting\n' "${pfx}" "${k8s_minor}" "${k8s_patch}" + + for ver in "${VERSIONS[@]}"; do + local key="${k8s_minor}__${ver}" + local iter_start=${SECONDS} + export MINIZITI_VERSION="${ver}" + printf '%s ziti %s — deploying fresh cluster [+%ds]\n' "${pfx}" "${ver}" "$((SECONDS - iter_start))" + + must_run clean minikube testvalues baseline zrok upgrade + + # ── settle wait: give router edge-channel time to establish after upgrade + printf '%s ziti %s — settle wait %ds after upgrade [+%ds]\n' \ + "${pfx}" "${ver}" "${UPGRADE_SETTLE_SECS}" "$((SECONDS - iter_start))" + sleep "${UPGRADE_SETTLE_SECS}" + + # ── A: no restart ──────────────────────────────────────────────────── + # Expected: PASS (baseline sanity check) + printf '%s ziti %s — combo A (no restart) [+%ds]\n' "${pfx}" "${ver}" "$((SECONDS - iter_start))" + probe_and_record proxy-test "${RESULT_DIR}/${key}__no_restart" + + # ── B: ctrl restart only ───────────────────────────────────────────── + # Expected: FAIL (stale JWKS — router hasn't refreshed its key cache) + printf '%s ziti %s — combo B (restart-ctrl, no wait) [+%ds]\n' "${pfx}" "${ver}" "$((SECONDS - iter_start))" + must_run restart-ctrl + probe_and_record proxy-test "${RESULT_DIR}/${key}__ctrl_only" + + # ── D: ctrl restart + timed wait ───────────────────────────────────── + # Fresh restart so the wait clock starts from zero; tests whether JWKS + # cache has a TTL / auto-refresh within JWKS_WAIT_SECS seconds. + # Expected: FAIL (confirms no self-healing; router restart is required) + printf '%s ziti %s — combo D (restart-ctrl + %ds wait) [+%ds]\n' \ + "${pfx}" "${ver}" "${JWKS_WAIT_SECS}" "$((SECONDS - iter_start))" + must_run restart-ctrl + sleep "${JWKS_WAIT_SECS}" + probe_and_record proxy-test "${RESULT_DIR}/${key}__ctrl_wait" + + # ── C: ctrl + router restart ───────────────────────────────────────── + # Expected: PASS (router re-fetches JWKS on startup → fix confirmed) + printf '%s ziti %s — combo C (restart-router) [+%ds]\n' "${pfx}" "${ver}" "$((SECONDS - iter_start))" + must_run restart-router + probe_and_record proxy-test "${RESULT_DIR}/${key}__ctrl_router" + + printf '%s ziti %s — done [+%ds]\n' "${pfx}" "${ver}" "$((SECONDS - iter_start))" + done + + printf '%s K8s %s — all ziti versions complete\n' "${pfx}" "${k8s_minor}" +} + +# ── resolve K8s minors ──────────────────────────────────────────────────────── +declare -a K8S_MINORS_ARR +if [[ -n "${K8S_MINORS:-}" ]]; then + read -ra K8S_MINORS_ARR <<< "${K8S_MINORS}" +else + log "Computing top 3 stable Kubernetes minors..." + read -ra K8S_MINORS_ARR <<< "$(compute_k8s_minors)" +fi + +log "K8s minors : ${K8S_MINORS_ARR[*]}" +log "Ziti versions: ${VERSIONS[*]}" +log "Parallelism : ${MATRIX_PARALLELISM} (stagger: ${WORKER_START_DELAY_SECS}s)" + +declare -A K8S_PATCH_MAP +for m in "${K8S_MINORS_ARR[@]}"; do + patch="$(resolve_k8s_patch "${m}")" + K8S_PATCH_MAP[${m}]="${patch}" + log " ${m} → ${patch}" +done + +# ── install tooling once (serial, shared across workers) ───────────────────── +log "Installing prerequisites (once)..." +# prereqs uses ZITI_NAMESPACE only for PATH export; any value is fine here +ZITI_NAMESPACE=miniziti bash "${RUNNER}" prereqs + +# ── launch parallel K8s workers with concurrency cap ───────────────────────── +declare -a worker_pids=() +declare -a worker_minors=() +active=0 + +for k8s_minor in "${K8S_MINORS_ARR[@]}"; do + # Honour concurrency cap: wait for one slot to free up + while [[ ${active} -ge ${MATRIX_PARALLELISM} ]]; do + wait "${worker_pids[0]}" || true # wait for oldest; swallow exit (we record per-probe) + worker_pids=("${worker_pids[@]:1}") + worker_minors=("${worker_minors[@]:1}") + (( active-- )) || true + done + + if [[ ${#worker_pids[@]} -gt 0 && ${WORKER_START_DELAY_SECS} -gt 0 ]]; then + log "Delaying ${WORKER_START_DELAY_SECS}s before launching K8s ${k8s_minor} worker..." + sleep "${WORKER_START_DELAY_SECS}" + fi + log "Launching worker for K8s ${k8s_minor} (${K8S_PATCH_MAP[${k8s_minor}]})" + run_k8s_worker "${k8s_minor}" "${K8S_PATCH_MAP[${k8s_minor}]}" & + worker_pids+=($!) + worker_minors+=("${k8s_minor}") + (( active++ )) || true +done + +# Wait for all remaining workers +for pid in "${worker_pids[@]}"; do + wait "${pid}" || true +done + +log_sep +log "All workers finished — aggregating results" +log_sep + +# ── terminal summary ────────────────────────────────────────────────────────── +local_wait_label="CTRL+${JWKS_WAIT_SECS}s WAIT" +printf '\n %-8s %-14s %-12s %-20s %-24s %-22s\n' \ + "K8S" "ZITI" "NO-RESTART" "CTRL-ONLY" "${local_wait_label}" "CTRL+ROUTER" +printf ' %-8s %-14s %-12s %-20s %-24s %-22s\n' \ + "---" "----" "----------" "---------" "-------------------" "-----------" + +read_result() { + local f="$1" + if [[ -f "${f}" ]]; then cat "${f}"; else echo "N/A"; fi +} + +for k8s_minor in "${K8S_MINORS_ARR[@]}"; do + for ver in "${VERSIONS[@]}"; do + key="${k8s_minor}__${ver}" + printf ' %-8s %-14s %-12s %-20s %-24s %-22s\n' \ + "${k8s_minor}" "${ver}" \ + "$(read_result "${RESULT_DIR}/${key}__no_restart")" \ + "$(read_result "${RESULT_DIR}/${key}__ctrl_only")" \ + "$(read_result "${RESULT_DIR}/${key}__ctrl_wait")" \ + "$(read_result "${RESULT_DIR}/${key}__ctrl_router")" + done +done + +# ── write markdown report ───────────────────────────────────────────────────── +log "Writing results to ${RESULTS_FILE}..." +{ +cat <
/dev/null || echo unknown) +**Commit:** $(git -C "${SCRIPT_DIR}" rev-parse --short HEAD 2>/dev/null || echo unknown) +**K8s minors tested:** ${K8S_MINORS_ARR[*]} +**Ziti versions tested:** ${VERSIONS[*]} +**Parallelism:** ${MATRIX_PARALLELISM} + +## Background + +After a \`ziti-controller\` pod restart the controller generates a new ephemeral +OIDC JWT signing key (not persisted to any Kubernetes Secret). The +\`ziti-router\` caches the controller's JWKS at startup and does **not** re-fetch +it on reconnect to a restarted controller. Any identity that authenticates +against the restarted controller receives a JWT signed with the new key, which +the router rejects with: + +> \`JWT validation failed: public key not found\` + +See \`ci-failure-analysis-server-cert-subjects.md\` for full analysis. + +## Isolation + +Each K8s minor version runs in a **separate minikube profile** (and k8s context) +named \`miniziti-1-\`. All \`helm\` calls pass \`--kube-context\` explicitly, +so parallel workers never touch each other's clusters. + +## K8s Patch Versions Resolved + +HEADER + +for m in "${K8S_MINORS_ARR[@]}"; do + printf -- '- %s → %s\n' "${m}" "${K8S_PATCH_MAP[${m}]}" +done + +printf '\n## Test Procedure\n\n' +printf 'For each (K8s minor, ziti version) pair:\n' +printf '1. Fresh minikube cluster at that K8s patch + baseline + zrok + upgrade to branch charts\n' +printf '2. Settle wait (%ds) after upgrade to allow router edge-channel to establish\n' "${JWKS_WAIT_SECS}" +printf '3. **Combo A** — `proxy-test` with no restarts (should always PASS)\n' +printf '4. **Combo B** — `restart-ctrl` then `proxy-test` (FAIL expected if stale-JWKS bug present)\n' +printf '5. **Combo D** — fresh `restart-ctrl` then wait %ds then `proxy-test`\n' "${JWKS_WAIT_SECS}" +printf ' (tests whether JWKS cache has a TTL / auto-refresh; FAIL → no self-healing)\n' +printf '6. **Combo C** — `restart-router` then `proxy-test` (PASS expected if router restart is the fix)\n\n' +printf 'Sequence per version: A → restart-ctrl → B → restart-ctrl → sleep(%ds) → D → restart-router → C\n\n' "${JWKS_WAIT_SECS}" +printf '`proxy-test` runs `ziti ops verify traffic` inside the controller container\n' +printf 'via `kubectl exec`, exercising end-to-end traffic through the ziti-router.\n' + +printf '\n## Results\n\n' +printf '| K8s | Ziti | No Restart | Ctrl Only | Ctrl+%ds Wait | Ctrl+Router |\n' "${JWKS_WAIT_SECS}" +printf '|-----|------|:----------:|:---------:|:--------------:|:-----------:|\n' + +for k8s_minor in "${K8S_MINORS_ARR[@]}"; do + for ver in "${VERSIONS[@]}"; do + key="${k8s_minor}__${ver}" + printf '| %-5s | %-14s | %-10s | %-9s | %-14s | %-11s |\n' \ + "${k8s_minor}" "${ver}" \ + "$(read_result "${RESULT_DIR}/${key}__no_restart")" \ + "$(read_result "${RESULT_DIR}/${key}__ctrl_only")" \ + "$(read_result "${RESULT_DIR}/${key}__ctrl_wait")" \ + "$(read_result "${RESULT_DIR}/${key}__ctrl_router")" + done +done + +cat <