@@ -261,6 +261,238 @@ var _ = Describe("EphemeralRunner", func() {
261261 ).Should (BeTrue (), "Ephemeral runner should eventually be deleted" )
262262 })
263263
264+ It ("It should delete ephemeral runner when pod failed before runner state is recorded and job assigned" , func () {
265+ er := new (v1alpha1.EphemeralRunner )
266+ Eventually (func () error {
267+ return k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, er )
268+ }, ephemeralRunnerTimeout , ephemeralRunnerInterval ).Should (Succeed (), "failed to get ephemeral runner" )
269+
270+ er .Status .JobID = "1"
271+ err := k8sClient .Status ().Update (ctx , er )
272+ Expect (err ).To (BeNil (), "failed to update ephemeral runner status" )
273+
274+ Eventually (func () (string , error ) {
275+ current := new (v1alpha1.EphemeralRunner )
276+ if err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, current ); err != nil {
277+ return "" , err
278+ }
279+ return current .Status .JobID , nil
280+ }, ephemeralRunnerTimeout , ephemeralRunnerInterval ).Should (BeEquivalentTo ("1" ))
281+
282+ pod := new (corev1.Pod )
283+ Eventually (func () (bool , error ) {
284+ if err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, pod ); err != nil {
285+ return false , err
286+ }
287+ return true , nil
288+ }, ephemeralRunnerTimeout , ephemeralRunnerInterval ).Should (BeEquivalentTo (true ))
289+
290+ pod .Status .Phase = corev1 .PodFailed
291+ pod .Status .ContainerStatuses = nil
292+ err = k8sClient .Status ().Update (ctx , pod )
293+ Expect (err ).To (BeNil (), "Failed to update pod status" )
294+
295+ Eventually (func () bool {
296+ check := new (v1alpha1.EphemeralRunner )
297+ err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, check )
298+ return kerrors .IsNotFound (err )
299+ }, ephemeralRunnerTimeout , ephemeralRunnerInterval ).Should (BeTrue (), "Ephemeral runner should eventually be deleted" )
300+ })
301+
302+ It ("It should delete ephemeral runner when pod failed before runner state is recorded and job not assigned" , func () {
303+ pod := new (corev1.Pod )
304+ Eventually (func () (bool , error ) {
305+ if err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, pod ); err != nil {
306+ return false , err
307+ }
308+ return true , nil
309+ }, ephemeralRunnerTimeout , ephemeralRunnerInterval ).Should (BeEquivalentTo (true ))
310+
311+ oldPodUID := pod .UID
312+
313+ pod .Status .Phase = corev1 .PodFailed
314+ pod .Status .ContainerStatuses = nil
315+ err := k8sClient .Status ().Update (ctx , pod )
316+ Expect (err ).To (BeNil (), "Failed to update pod status" )
317+
318+ Eventually (
319+ func () (int , error ) {
320+ updated := new (v1alpha1.EphemeralRunner )
321+ err := k8sClient .Get (
322+ ctx ,
323+ client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace },
324+ updated ,
325+ )
326+ if err != nil {
327+ return 0 , err
328+ }
329+ return len (updated .Status .Failures ), nil
330+ },
331+ ephemeralRunnerTimeout ,
332+ ephemeralRunnerInterval ,
333+ ).Should (BeEquivalentTo (1 ))
334+
335+ Eventually (
336+ func () (bool , error ) {
337+ newPod := new (corev1.Pod )
338+ err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, newPod )
339+ if err != nil {
340+ return false , err
341+ }
342+ return newPod .UID != oldPodUID , nil
343+ },
344+ ephemeralRunnerTimeout ,
345+ ephemeralRunnerInterval ,
346+ ).Should (BeTrue (), "Pod should be re-created" )
347+ })
348+
349+ It ("It should treat pod failed with runner container exit 0 as success with job id" , func () {
350+ er := new (v1alpha1.EphemeralRunner )
351+ Eventually (func () error {
352+ return k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, er )
353+ }, ephemeralRunnerTimeout , ephemeralRunnerInterval ).Should (Succeed (), "failed to get ephemeral runner" )
354+
355+ er .Status .JobID = "1"
356+ err := k8sClient .Status ().Update (ctx , er )
357+ Expect (err ).To (BeNil (), "failed to update ephemeral runner status" )
358+
359+ pod := new (corev1.Pod )
360+ Eventually (
361+ func () error {
362+ if err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, pod ); err != nil {
363+ return err
364+ }
365+ return nil
366+ },
367+ ephemeralRunnerTimeout ,
368+ ephemeralRunnerInterval ,
369+ ).Should (Succeed (), "failed to get pod" )
370+
371+ pod .Status .Phase = corev1 .PodFailed
372+ pod .Status .ContainerStatuses = append (pod .Status .ContainerStatuses , corev1.ContainerStatus {
373+ Name : v1alpha1 .EphemeralRunnerContainerName ,
374+ State : corev1.ContainerState {
375+ Terminated : & corev1.ContainerStateTerminated {
376+ ExitCode : 0 ,
377+ },
378+ },
379+ })
380+ err = k8sClient .Status ().Update (ctx , pod )
381+ Expect (err ).To (BeNil (), "Failed to update pod status" )
382+
383+ Eventually (
384+ func () bool {
385+ check := new (v1alpha1.EphemeralRunner )
386+ err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, check )
387+ return kerrors .IsNotFound (err )
388+ },
389+ ephemeralRunnerTimeout ,
390+ ephemeralRunnerInterval ,
391+ ).Should (BeTrue (), "Ephemeral runner should eventually be deleted" )
392+ })
393+
394+ It ("It should treat pod failed with runner container exit 0 as success with no job id" , func () {
395+ pod := new (corev1.Pod )
396+ Eventually (
397+ func () error {
398+ if err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, pod ); err != nil {
399+ return err
400+ }
401+ return nil
402+ },
403+ ephemeralRunnerTimeout ,
404+ ephemeralRunnerInterval ,
405+ ).Should (Succeed (), "failed to get pod" )
406+
407+ pod .Status .Phase = corev1 .PodFailed
408+ pod .Status .ContainerStatuses = append (pod .Status .ContainerStatuses , corev1.ContainerStatus {
409+ Name : v1alpha1 .EphemeralRunnerContainerName ,
410+ State : corev1.ContainerState {
411+ Terminated : & corev1.ContainerStateTerminated {
412+ ExitCode : 0 ,
413+ },
414+ },
415+ })
416+ err := k8sClient .Status ().Update (ctx , pod )
417+ Expect (err ).To (BeNil (), "Failed to update pod status" )
418+
419+ Eventually (
420+ func () bool {
421+ check := new (v1alpha1.EphemeralRunner )
422+ err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, check )
423+ return kerrors .IsNotFound (err )
424+ },
425+ ephemeralRunnerTimeout ,
426+ ephemeralRunnerInterval ,
427+ ).Should (BeTrue (), "Ephemeral runner should eventually be deleted" )
428+ })
429+
430+ It ("It should mark as failed when job is not assigned and pod is failed" , func () {
431+ er := new (v1alpha1.EphemeralRunner )
432+ Eventually (func () error {
433+ return k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, er )
434+ },
435+ ephemeralRunnerTimeout ,
436+ ephemeralRunnerInterval ,
437+ ).Should (Succeed (), "failed to get ephemeral runner" )
438+
439+ pod := new (corev1.Pod )
440+ Eventually (
441+ func () error {
442+ if err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, pod ); err != nil {
443+ return err
444+ }
445+ return nil
446+ },
447+ ephemeralRunnerTimeout ,
448+ ephemeralRunnerInterval ,
449+ ).Should (Succeed (), "failed to get pod" )
450+
451+ pod .Status .Phase = corev1 .PodFailed
452+ oldPodUID := pod .UID
453+ pod .Status .ContainerStatuses = append (pod .Status .ContainerStatuses , corev1.ContainerStatus {
454+ Name : v1alpha1 .EphemeralRunnerContainerName ,
455+ State : corev1.ContainerState {
456+ Terminated : & corev1.ContainerStateTerminated {
457+ ExitCode : 1 ,
458+ },
459+ },
460+ })
461+
462+ err := k8sClient .Status ().Update (ctx , pod )
463+ Expect (err ).To (BeNil (), "Failed to update pod status" )
464+
465+ Eventually (
466+ func () (int , error ) {
467+ updated := new (v1alpha1.EphemeralRunner )
468+ err := k8sClient .Get (
469+ ctx ,
470+ client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace },
471+ updated ,
472+ )
473+ if err != nil {
474+ return 0 , err
475+ }
476+ return len (updated .Status .Failures ), nil
477+ },
478+ ephemeralRunnerTimeout ,
479+ ephemeralRunnerInterval ,
480+ ).Should (BeEquivalentTo (1 ))
481+
482+ Eventually (
483+ func () (bool , error ) {
484+ newPod := new (corev1.Pod )
485+ err := k8sClient .Get (ctx , client.ObjectKey {Name : ephemeralRunner .Name , Namespace : ephemeralRunner .Namespace }, newPod )
486+ if err != nil {
487+ return false , err
488+ }
489+ return newPod .UID != oldPodUID , nil
490+ },
491+ ephemeralRunnerTimeout ,
492+ ephemeralRunnerInterval ,
493+ ).Should (BeTrue (), "Pod should be re-created" )
494+ })
495+
264496 It ("It should failed if a pod template is invalid" , func () {
265497 invalideEphemeralRunner := newExampleRunner ("invalid-ephemeral-runner" , autoscalingNS .Name , configSecret .Name )
266498 invalideEphemeralRunner .Spec .Spec .PriorityClassName = "notexist"
0 commit comments