@@ -11,6 +11,13 @@ NAMESPACE="${2:?Missing NAMESPACE argument}"
1111ZK_STS_NAME=" ${ZENKO_NAME} -base-quorum"
1212ZK_CONTAINER_NAME=" zookeeper"
1313ZK_POD_NAME=" ${ZK_STS_NAME} -0"
14+ # Name of the Pravega zookeeper-operator deployment.
15+ # Can be overridden, otherwise we auto-detect.
16+ ZK_OPERATOR_DEPLOYMENT=" ${ZK_OPERATOR_DEPLOYMENT:- } "
17+
18+ # By default keep the operator scaled down.
19+ # This avoids it reconciling/reverting the JVMFLAGS workaround.
20+ ZK_OPERATOR_KEEP_SCALED_DOWN=" ${ZK_OPERATOR_KEEP_SCALED_DOWN:- true} "
1421
1522OPERATOR_WAIT_TIMEOUT=120
1623STATEFULSET_WAIT_TIMEOUT=180
@@ -21,6 +28,45 @@ get_elapsed() {
2128 echo $(( $(date +% s) - start_time))
2229}
2330
31+ normalize_k8s_name () {
32+ # Accept either a plain name (foo) or a resource/name form (deployment.apps/foo, deployment/foo, deploy/foo)
33+ # and return only the name part (foo).
34+ local value=" ${1:- } "
35+ if [[ -z " $value " ]]; then
36+ echo " "
37+ return 0
38+ fi
39+ echo " $value " | awk -F/ ' {print $NF}'
40+ }
41+
42+ detect_zk_operator_deployment () {
43+ if [[ -n " ${ZK_OPERATOR_DEPLOYMENT} " ]]; then
44+ ZK_OPERATOR_DEPLOYMENT=" $( normalize_k8s_name " ${ZK_OPERATOR_DEPLOYMENT} " ) "
45+ return 0
46+ fi
47+
48+ # Prefer Helm labels if present (release name is typically 'zk-operator')
49+ ZK_OPERATOR_DEPLOYMENT=$( kubectl -n " ${NAMESPACE} " get deploy \
50+ -l app.kubernetes.io/instance=zk-operator \
51+ -o jsonpath=' {.items[0].metadata.name}' 2> /dev/null || true)
52+
53+ # Fallback: best-effort name match
54+ if [[ -z " ${ZK_OPERATOR_DEPLOYMENT} " ]]; then
55+ ZK_OPERATOR_DEPLOYMENT=$( kubectl -n " ${NAMESPACE} " get deploy \
56+ -o jsonpath=' {range .items[*]}{.metadata.name}{"\n"}{end}' 2> /dev/null \
57+ | grep -E ' zookeeper-operator|zk-operator' \
58+ | head -n 1 || true)
59+ fi
60+
61+ if [[ -z " ${ZK_OPERATOR_DEPLOYMENT} " ]]; then
62+ echo " ERROR: Could not detect zookeeper-operator deployment in namespace ${NAMESPACE} ." >&2
63+ echo " Hint: run 'kubectl -n ${NAMESPACE} get deploy' and set ZK_OPERATOR_DEPLOYMENT explicitly." >&2
64+ exit 1
65+ fi
66+
67+ ZK_OPERATOR_DEPLOYMENT=" $( normalize_k8s_name " ${ZK_OPERATOR_DEPLOYMENT} " ) "
68+ }
69+
2470# Wait for the Zenko operator to process the CR
2571OPERATOR_WAIT_START=$( date +%s)
2672
@@ -61,8 +107,18 @@ while true; do
61107 sleep 2
62108done
63109
110+ # Detect zk-operator deployment name
111+ detect_zk_operator_deployment
112+
64113# Patch the StatefulSet with JVM flags to disable container support
65114# as ubuntu runners now are incompatible with zookeeper.
115+ # We need to scale down the zookeeper operator first, otherwise it will
116+ # reconcile and revert our patch.
117+ echo " Scaling down Zookeeper operator to prevent reconciliation..."
118+ kubectl -n " ${NAMESPACE} " scale " deployment/${ZK_OPERATOR_DEPLOYMENT} " --replicas=0
119+ kubectl -n " ${NAMESPACE} " rollout status " deployment/${ZK_OPERATOR_DEPLOYMENT} " --timeout=60s || true
120+
121+ echo " Patching StatefulSet with JVMFLAGS..."
66122kubectl -n " ${NAMESPACE} " patch statefulset " ${ZK_STS_NAME} " --type=' strategic' \
67123 -p ' {
68124 "spec": {
@@ -84,14 +140,33 @@ kubectl -n "${NAMESPACE}" patch statefulset "${ZK_STS_NAME}" --type='strategic'
84140 }
85141 }'
86142
143+ # Verify patch stuck (and was not immediately reverted)
144+ echo " Verifying JVMFLAGS is present on StatefulSet template..."
145+ if ! kubectl -n " ${NAMESPACE} " get statefulset " ${ZK_STS_NAME} " \
146+ -o jsonpath=' {.spec.template.spec.containers[?(@.name=="' " ${ZK_CONTAINER_NAME} " ' ")].env[?(@.name=="JVMFLAGS")].value}' \
147+ | grep -q -- " -XX:-UseContainerSupport" ; then
148+ echo " ERROR: JVMFLAGS patch did not apply (or was reverted)." >&2
149+ kubectl -n " ${NAMESPACE} " get statefulset " ${ZK_STS_NAME} " -o yaml | sed -n ' 1,200p' >&2 || true
150+ exit 1
151+ fi
152+
87153
88154# Delete the pod to apply the patch
89155kubectl delete pod " ${ZK_POD_NAME} " -n " ${NAMESPACE} " --ignore-not-found=true --wait=false
90156
91157# Wait for the pod to become Ready
92158if ! kubectl wait --for=condition=Ready " pod/${ZK_POD_NAME} " --timeout=300s -n " ${NAMESPACE} " ; then
93159 echo " ERROR: Zookeeper pod ${ZK_POD_NAME} failed to become Ready after patching."
160+ # Scale operator back up before exiting
161+ kubectl -n " ${NAMESPACE} " scale " deployment/${ZK_OPERATOR_DEPLOYMENT} " --replicas=1
94162 exit 1
95163fi
96164
165+ if [[ " ${ZK_OPERATOR_KEEP_SCALED_DOWN} " != " true" ]]; then
166+ echo " Scaling Zookeeper operator back up..."
167+ kubectl -n " ${NAMESPACE} " scale " deployment/${ZK_OPERATOR_DEPLOYMENT} " --replicas=1
168+ else
169+ echo " Leaving Zookeeper operator scaled down (ZK_OPERATOR_KEEP_SCALED_DOWN=true)."
170+ fi
171+
97172echo " Zookeeper fix applied successfully."
0 commit comments