@@ -30,6 +30,7 @@ import (
3030 "gvisor.dev/gvisor/pkg/cleanup"
3131 "gvisor.dev/gvisor/pkg/test/testutil"
3232 "gvisor.dev/gvisor/runsc/config"
33+ "gvisor.dev/gvisor/runsc/sandbox"
3334 "gvisor.dev/gvisor/test/metricclient"
3435)
3536
@@ -910,3 +911,107 @@ func TestMetricServerDoesNotExportZeroValueCounters(t *testing.T) {
910911 t.Logf("Last metric data:\n\n%s\n\n", metricData)
911912 }
912913}
914+
915+ func TestContainerMetricsAfterRestore(t *testing.T) {
916+ te, cleanup := setupMetrics(t /* forceTempUDS= */, false)
917+ defer cleanup()
918+
919+ imagePath, err := os.MkdirTemp(testutil.TmpDir(), "checkpoint")
920+ if err != nil {
921+ t.Fatalf("os.MkdirTemp failed: %v", err)
922+ }
923+ defer os.RemoveAll(imagePath)
924+
925+ containerID := testutil.RandomContainerID()
926+ args := Args{
927+ ID: containerID,
928+ Spec: te.sleepSpec,
929+ BundleDir: te.bundleDir,
930+ }
931+ cont, err := New(te.sleepConf, args)
932+ if err != nil {
933+ t.Fatalf("error creating container: %v", err)
934+ }
935+
936+ if err := cont.Start(te.sleepConf); err != nil {
937+ t.Fatalf("Cannot start container: %v", err)
938+ }
939+
940+ if err := cont.Checkpoint(te.sleepConf, imagePath, sandbox.CheckpointOpts{Resume: true}); err != nil {
941+ t.Fatalf("Checkpoint failed: %v", err)
942+ }
943+
944+ // Check time saved metrics are zero before restore.
945+ ckpData, err := te.client.GetMetrics(te.testCtx, nil)
946+ if err != nil {
947+ t.Fatalf("Cannot get metrics after restoring container: %v", err)
948+ }
949+ cpuTimeSavedBefore, _, err := ckpData.GetPrometheusContainerInteger(metricclient.WantMetric{
950+ Metric: "testmetric_meta_sandbox_cpu_time_saved",
951+ Sandbox: containerID,
952+ })
953+ if err != nil {
954+ t.Errorf("Cannot get testmetric_meta_sandbox_cpu_time_saved from following data (err: %v):\n\n%s\n\n", err, ckpData)
955+ }
956+ if cpuTimeSavedBefore != 0 {
957+ t.Errorf("testmetric_meta_sandbox_cpu_time_saved should be zero, got %v", cpuTimeSavedBefore)
958+ }
959+ wallTimeSavedBefore, _, err := ckpData.GetPrometheusContainerInteger(metricclient.WantMetric{
960+ Metric: "testmetric_meta_sandbox_wall_time_saved",
961+ Sandbox: containerID,
962+ })
963+ if err != nil {
964+ t.Errorf("Cannot get testmetric_meta_sandbox_wall_time_saved from following data (err: %v):\n\n%s\n\n", err, ckpData)
965+ }
966+ if wallTimeSavedBefore != 0 {
967+ t.Errorf("testmetric_meta_sandbox_wall_time_saved should be zero, got %v", wallTimeSavedBefore)
968+ }
969+
970+ if err := cont.Destroy(); err != nil {
971+ t.Fatalf("Destroy failed: %v", err)
972+ }
973+
974+ // Create new container to restore into.
975+ restoreCont, err := New(te.sleepConf, args)
976+ if err != nil {
977+ t.Fatalf("error creating container for restore: %v", err)
978+ }
979+ defer restoreCont.Destroy()
980+
981+ if err := restoreCont.Restore(te.sleepConf, imagePath, false, false); err != nil {
982+ t.Fatalf("Restore failed: %v", err)
983+ }
984+
985+ var cpuTimeSaved, wallTimeSaved int64
986+ err = testutil.Poll(func() error {
987+ restoreData, err := te.client.GetMetrics(te.testCtx, nil)
988+ if err != nil {
989+ return fmt.Errorf("cannot get metrics after restoring container: %v", err)
990+ }
991+
992+ // Check time saved metrics are non-zero after restore.
993+ cpuTimeSaved, _, err = restoreData.GetPrometheusContainerInteger(metricclient.WantMetric{
994+ Metric: "testmetric_meta_sandbox_cpu_time_saved",
995+ Sandbox: containerID,
996+ })
997+ if err != nil {
998+ return fmt.Errorf("cannot get testmetric_meta_sandbox_cpu_time_saved from following data (err: %v):\n%s", err, restoreData)
999+ }
1000+ wallTimeSaved, _, err = restoreData.GetPrometheusContainerInteger(metricclient.WantMetric{
1001+ Metric: "testmetric_meta_sandbox_wall_time_saved",
1002+ Sandbox: containerID,
1003+ })
1004+ if err != nil {
1005+ return fmt.Errorf("cannot get testmetric_meta_sandbox_wall_time_saved from following data (err: %v):\n%s", err, restoreData)
1006+ }
1007+ if cpuTimeSaved == 0 && wallTimeSaved == 0 {
1008+ return fmt.Errorf("time saved metrics are zero: cpu: %d, wall: %d", cpuTimeSaved, wallTimeSaved)
1009+ }
1010+ return nil
1011+ }, 500*time.Millisecond)
1012+ if err != nil {
1013+ t.Fatalf("Failed to get non-zero time saved metrics: %v", err)
1014+ }
1015+ t.Logf("After restore, cpu_time_saved=%d", cpuTimeSaved)
1016+ t.Logf("After restore, wall_time_saved=%d", wallTimeSaved)
1017+ }
0 commit comments