@@ -12,6 +12,8 @@ ZK_STS_NAME="${ZENKO_NAME}-base-quorum"
1212ZK_CONTAINER_NAME=" zookeeper"
1313ZK_POD_NAME=" ${ZK_STS_NAME} -0"
1414
15+ ZK_JVMFLAGS=" -Xmx512m -Xms512m -XX:-UseContainerSupport -XX:ActiveProcessorCount=1 -Djava.awt.headless=true -Dzookeeper.log.dir=/data/logs -Dzookeeper.root.logger=INFO,CONSOLE -Dlog4j.configuration=file:/data/conf/log4j.properties"
16+
1517OPERATOR_WAIT_TIMEOUT=120
1618STATEFULSET_WAIT_TIMEOUT=180
1719
@@ -61,37 +63,83 @@ while true; do
6163 sleep 2
6264done
6365
64- # Patch the StatefulSet with JVM flags to disable container support
65- # as ubuntu runners now are incompatible with zookeeper.
66- kubectl -n " ${NAMESPACE} " patch statefulset " ${ZK_STS_NAME} " --type=' strategic' \
66+ # IMPORTANT:
67+ # The zookeeper-operator reconciles the StatefulSet from the ZookeeperCluster CR.
68+ # Patching the StatefulSet directly is often reverted immediately.
69+ # Patch the ZookeeperCluster spec instead so it persists (CI + lab).
70+ echo " Waiting for ZookeeperCluster (${ZK_STS_NAME} ) to be created by operator..."
71+ ZKCLUSTER_WAIT_START=$( date +%s)
72+ while true ; do
73+ ELAPSED=$( get_elapsed ${ZKCLUSTER_WAIT_START} )
74+ if [ ${ELAPSED} -ge ${STATEFULSET_WAIT_TIMEOUT} ]; then
75+ echo " ERROR: Timed out after ${STATEFULSET_WAIT_TIMEOUT} s waiting for ZookeeperCluster ${ZK_STS_NAME} ."
76+ kubectl -n " ${NAMESPACE} " get zookeeperclusters.zookeeper.pravega.io -o wide 2> /dev/null || true
77+ exit 1
78+ fi
79+ if kubectl -n " ${NAMESPACE} " get zookeepercluster " ${ZK_STS_NAME} " > /dev/null 2>&1 ; then
80+ break
81+ fi
82+ sleep 2
83+ done
84+
85+ echo " Patching ZookeeperCluster with JVMFLAGS workaround..."
86+ kubectl -n " ${NAMESPACE} " patch zookeepercluster " ${ZK_STS_NAME} " --type=' merge' \
6787 -p ' {
6888 "spec": {
69- "template": {
70- "spec": {
71- "containers": [
89+ "containers": [
90+ {
91+ "name": "' " ${ZK_CONTAINER_NAME} " ' ",
92+ "env": [
7293 {
73- "name": "' " ${ZK_CONTAINER_NAME} " ' ",
74- "env": [
75- {
76- "name": "JVMFLAGS",
77- "value": "-Xmx512m -Xms512m -XX:-UseContainerSupport -XX:ActiveProcessorCount=1 -Djava.awt.headless=true -Dzookeeper.log.dir=/data/logs -Dzookeeper.root.logger=INFO,CONSOLE -Dlog4j.configuration=file:/data/conf/log4j.properties"
78- }
79- ]
94+ "name": "JVMFLAGS",
95+ "value": "' " ${ZK_JVMFLAGS} " ' "
8096 }
8197 ]
8298 }
83- }
99+ ]
84100 }
85101 }'
86102
103+ echo " Verifying JVMFLAGS is present on ZookeeperCluster spec..."
104+ if ! kubectl -n " ${NAMESPACE} " get zookeepercluster " ${ZK_STS_NAME} " \
105+ -o jsonpath=' {.spec.containers[?(@.name=="' " ${ZK_CONTAINER_NAME} " ' ")].env[?(@.name=="JVMFLAGS")].value}{"\n"}' \
106+ | grep -q -- " -XX:-UseContainerSupport" ; then
107+ echo " ERROR: JVMFLAGS not present on ZookeeperCluster spec after patch." >&2
108+ kubectl -n " ${NAMESPACE} " get zookeepercluster " ${ZK_STS_NAME} " -o yaml | sed -n ' 1,220p' >&2 || true
109+ exit 1
110+ fi
111+
87112
88113# Delete the pod to apply the patch
89114kubectl delete pod " ${ZK_POD_NAME} " -n " ${NAMESPACE} " --ignore-not-found=true --wait=false
90115
91- # Wait for the pod to become Ready
92- if ! kubectl wait --for=condition=Ready " pod/${ZK_POD_NAME} " --timeout=300s -n " ${NAMESPACE} " ; then
93- echo " ERROR: Zookeeper pod ${ZK_POD_NAME} failed to become Ready after patching."
116+ # Wait for the StatefulSet to recreate the pod, then for it to become Ready.
117+ POD_WAIT_TIMEOUT=300
118+ POD_WAIT_START=$( date +%s)
119+
120+ while true ; do
121+ ELAPSED=$( get_elapsed ${POD_WAIT_START} )
122+ if [ ${ELAPSED} -ge ${POD_WAIT_TIMEOUT} ]; then
123+ echo " ERROR: Timed out after ${POD_WAIT_TIMEOUT} s waiting for ${ZK_POD_NAME} to be recreated and become Ready." >&2
124+ echo " --- StatefulSet status ---" >&2
125+ kubectl -n " ${NAMESPACE} " get sts " ${ZK_STS_NAME} " -o wide >&2 || true
126+ kubectl -n " ${NAMESPACE} " describe sts " ${ZK_STS_NAME} " | sed -n ' 1,200p' >&2 || true
127+ echo " --- Pod list (matching quorum) ---" >&2
128+ kubectl -n " ${NAMESPACE} " get pods -o wide | grep " ${ZK_STS_NAME} " >&2 || true
129+ echo " --- Zookeeper pod describe ---" >&2
130+ kubectl -n " ${NAMESPACE} " describe pod " ${ZK_POD_NAME} " >&2 || true
131+ echo " --- Zookeeper pod logs (tail) ---" >&2
132+ kubectl -n " ${NAMESPACE} " logs " ${ZK_POD_NAME} " --tail=120 >&2 || true
94133 exit 1
95- fi
134+ fi
135+
136+ if kubectl -n " ${NAMESPACE} " get pod " ${ZK_POD_NAME} " > /dev/null 2>&1 ; then
137+ if kubectl -n " ${NAMESPACE} " wait --for=condition=Ready " pod/${ZK_POD_NAME} " --timeout=10s > /dev/null 2>&1 ; then
138+ break
139+ fi
140+ fi
141+
142+ sleep 2
143+ done
96144
97145echo " Zookeeper fix applied successfully."
0 commit comments