Skip to content

Commit baa93bd

Browse files
refactor: add retry workaround for image builds (#413)
1 parent 8a674f5 commit baa93bd

File tree

2 files changed

+41
-10
lines changed

2 files changed

+41
-10
lines changed

internal/generator/generator.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ func NewGenerator(
374374

375375
// get any variables from the API here that could be used to influence a build or services within the environment
376376
// collect docker buildkit value
377-
dockerBuildKit, _ := lagoon.GetLagoonVariable("DOCKER_BUILDKIT", []string{"build"}, buildValues.EnvironmentVariables)
377+
dockerBuildKit, _ := lagoon.GetLagoonVariable("DOCKER_BUILDKIT", []string{"build", "global"}, buildValues.EnvironmentVariables)
378378
if dockerBuildKit != nil {
379379
bk, _ := strconv.ParseBool(dockerBuildKit.Value)
380380
buildValues.DockerBuildKit = &bk

legacy/build-deploy-docker-compose.sh

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -689,19 +689,50 @@ if [[ "$BUILD_TYPE" == "pullrequest" || "$BUILD_TYPE" == "branch" ]]; then
689689
# determine if buildkit should be disabled for this build
690690
DOCKER_BUILDKIT=1
691691
if [ "$(echo "${ENVIRONMENT_IMAGE_BUILD_DATA}" | jq -r '.buildKit')" == "false" ]; then
692-
DOCKER_BUILDKIT=0
693-
echo "Not using BuildKit for $DOCKERFILE"
692+
DOCKER_BUILDKIT=0
693+
echo "Not using BuildKit for $DOCKERFILE"
694694
else
695-
echo "Using BuildKit for $DOCKERFILE"
695+
echo "Using BuildKit for $DOCKERFILE"
696696
fi
697-
698-
# now do the actual image build
697+
export DOCKER_BUILDKIT
698+
BUILD_TARGET_ARGS=""
699699
if [ $BUILD_TARGET == "false" ]; then
700-
echo "Building ${BUILD_CONTEXT}/${DOCKERFILE}"
701-
DOCKER_BUILDKIT=$DOCKER_BUILDKIT docker build --network=host "${BUILD_ARGS[@]}" -t $TEMPORARY_IMAGE_NAME -f $BUILD_CONTEXT/$DOCKERFILE $BUILD_CONTEXT
700+
echo "Building ${BUILD_CONTEXT}/${DOCKERFILE}"
702701
else
703-
echo "Building target ${BUILD_TARGET} for ${BUILD_CONTEXT}/${DOCKERFILE}"
704-
DOCKER_BUILDKIT=$DOCKER_BUILDKIT docker build --network=host "${BUILD_ARGS[@]}" -t $TEMPORARY_IMAGE_NAME -f $BUILD_CONTEXT/$DOCKERFILE --target $BUILD_TARGET $BUILD_CONTEXT
702+
echo "Building target ${BUILD_TARGET} for ${BUILD_CONTEXT}/${DOCKERFILE}"
703+
BUILD_TARGET_ARGS="--target ${BUILD_TARGET}"
704+
fi
705+
# now do the actual image build, this pipes to tee so that the build output is still realtime in any logs
706+
# ie, if someone was looking at the build container logs in k8s
707+
# this also captures any errors that this command will encounter so that the process can then check the output file to see if the
708+
# error condition we are looking for is there
709+
set +e
710+
(docker build --network=host "${BUILD_ARGS[@]}" -t $TEMPORARY_IMAGE_NAME -f $BUILD_CONTEXT/$DOCKERFILE $BUILD_TARGET_ARGS $BUILD_CONTEXT 2>&1 | tee /kubectl-build-deploy/log-$TEMPORARY_IMAGE_NAME; exit ${PIPESTATUS[0]})
711+
buildExit=$?
712+
set -e
713+
if [ "${buildExit}" != "0" ]; then
714+
# if the build errors and contains the message we are looking for, then it is probably a buildkit related failure
715+
# attempt to run run again with --no-cache so that it forces layer invalidation. this will make the build slower, but hopefully succeed
716+
# why this happens is still to be determined. there isn't enough information in the error to be able to know which layers are the problem
717+
# or what the actual cause is, making it incredibly difficult to reproduce
718+
# without being able to reproduce we have to use this workaround to retry :'(
719+
capErr=0
720+
if cat /kubectl-build-deploy/log-$TEMPORARY_IMAGE_NAME | grep -q "ERROR: failed to solve: layer does not exist"; then
721+
capErr=1
722+
elif cat /kubectl-build-deploy/log-$TEMPORARY_IMAGE_NAME | grep -q "ERROR: failed to solve: failed to prepare"; then
723+
capErr=1
724+
fi
725+
if [ "${capErr}" != "0" ]; then
726+
# at least drop a message saying that this was encountered
727+
echo "##############################################
728+
The first attempt to build ${BUILD_CONTEXT}/${DOCKERFILE} failed due to a layer error
729+
Retrying build for ${BUILD_CONTEXT}/${DOCKERFILE} without cache
730+
##############################################"
731+
docker build --no-cache --network=host "${BUILD_ARGS[@]}" -t $TEMPORARY_IMAGE_NAME -f $BUILD_CONTEXT/$DOCKERFILE $BUILD_TARGET_ARGS $BUILD_CONTEXT
732+
else
733+
# if the failure is not one that matches the buildkit layer issue, then exit as a normal build failure
734+
exit 1
735+
fi
705736
fi
706737

707738
# Keep a list of the images we have built, as we need to push them to the registry later

0 commit comments

Comments
 (0)