scheduler: add diagnostic messages for SCHED_DEADLINE#1966
scheduler: add diagnostic messages for SCHED_DEADLINE#1966giuseppe wants to merge 1 commit intocontainers:mainfrom
Conversation
|
TMT tests failed. @containers/packit-build please check. |
src/libcrun/scheduler.c
Outdated
| return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` runtime must be greater than 0"); | ||
| if (attr->sched_deadline == 0) | ||
| return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` deadline must be greater than 0"); | ||
| if (attr->sched_period == 0) |
There was a problem hiding this comment.
sched(7) man page (when describing SCHED_DEADLINE) says:
If sched_period is specified as 0, then it is made the same as sched_deadline.
so maybe this check should be removed (as it may return a false warning and hide subsequent checks).
src/libcrun/scheduler.c
Outdated
| if (attr->sched_runtime > attr->sched_deadline) | ||
| return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` runtime (%" PRIu64 ") must be <= deadline (%" PRIu64 ")", | ||
| attr->sched_runtime, attr->sched_deadline); | ||
| if (attr->sched_deadline > attr->sched_period) |
There was a problem hiding this comment.
In the view of the above, this should be changed to
if (attr->sched_period != 0 && attr->sched_deadline > attr->sched_period)| if (attr->sched_deadline > attr->sched_period) | ||
| return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` deadline (%" PRIu64 ") must be <= period (%" PRIu64 ")", | ||
| attr->sched_deadline, attr->sched_period); | ||
|
|
There was a problem hiding this comment.
sched(7) also says
In addition, under the current implementation, all of the parameter values must be at least 1024 (i.e., just over one microsecond, which is the resolution of the implementation), and less than 2^63.
so perhaps it makes sense to add these checks, too (given that we've already checked for other issues and we still have EINVAL to deal with).
tests/test_scheduler.py
Outdated
| except Exception as e: | ||
| error_str = str(e).lower() | ||
| if "deadline" in error_str or "scheduler" in error_str: | ||
| return (77, "SCHED_DEADLINE not available") |
There was a problem hiding this comment.
I think this code is wrong. In most other tests Exception means the test failed, plus we check for all preconditions beforehand.
tests/test_scheduler.py
Outdated
| if "scheduler" in output.lower() or "deadline" in output.lower(): | ||
| return (77, "SCHED_DEADLINE not available") |
There was a problem hiding this comment.
I think this here means that if we see a word "scheduler" anywhere in the output, we skip the test instead of failing it, and it's a wrong thing to do.
Found this when I added more tests via copy/paste and they were all "skipped" instead of failing.
There was a problem hiding this comment.
There are a few places like this, I fixed those for this particular PR but there are are some more.
|
Here's the diff with my changes based on the above comments: diff --git a/src/libcrun/scheduler.c b/src/libcrun/scheduler.c
index 8c647b36..a229bb77 100644
--- a/src/libcrun/scheduler.c
+++ b/src/libcrun/scheduler.c
@@ -111,16 +111,30 @@ diagnose_scheduler_failure (int ret, libcrun_error_t *err, runtime_spec_schema_c
return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` runtime must be greater than 0");
if (attr->sched_deadline == 0)
return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` deadline must be greater than 0");
- if (attr->sched_period == 0)
- return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` period must be greater than 0");
+ /* Per sched(7), ff sched_period is specified as 0, then it is made the same as sched_deadline. */
if (attr->sched_runtime > attr->sched_deadline)
return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` runtime (%" PRIu64 ") must be <= deadline (%" PRIu64 ")",
attr->sched_runtime, attr->sched_deadline);
- if (attr->sched_deadline > attr->sched_period)
+ if (attr->sched_period != 0 && attr->sched_deadline > attr->sched_period)
return crun_make_error (err, errno, "sched_setattr: `SCHED_DEADLINE` deadline (%" PRIu64 ") must be <= period (%" PRIu64 ")",
attr->sched_deadline, attr->sched_period);
+ /* sched(7) says "under the current implementation, all of the parameter values
+ * must be at least 1024 <...> and less than 2^63". */
+ const uint64_t min = 1024;
+ const uint64_t max = 1ULL << 63;
+
+ if (attr->sched_runtime < min || attr->sched_runtime > max)
+ return crun_make_error(err, errno, "sched_setattr: `SCHED_DEADLINE` runtime (%" PRIu64 ") must be between %" PRIu64 " and %" PRIu64,
+ attr->sched_runtime, min, max);
+ if (attr->sched_deadline < min || attr->sched_deadline > max)
+ return crun_make_error(err, errno, "sched_setattr: `SCHED_DEADLINE` deadline (%" PRIu64 ") must be between %" PRIu64 " and %" PRIu64,
+ attr->sched_deadline, min, max);
+ if (attr->sched_period != 0 && (attr->sched_period < min || attr->sched_period > max))
+ return crun_make_error(err, errno, "sched_setattr: `SCHED_DEADLINE` period (%" PRIu64 ") must be between %" PRIu64 " and %" PRIu64,
+ attr->sched_period, min, max);
+
return crun_make_error (err, errno, "sched_setattr: invalid `SCHED_DEADLINE` parameters (runtime=%" PRIu64 ", deadline=%" PRIu64 ", period=%" PRIu64 ")",
attr->sched_runtime, attr->sched_deadline, attr->sched_period);
}
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index 87473ee8..6e4dba61 100755
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -218,16 +218,9 @@ def test_scheduler_deadline():
return 0
except subprocess.CalledProcessError as e:
- output = e.output.decode('utf-8', errors='ignore') if e.output else ''
- # SCHED_DEADLINE often not available
- if "scheduler" in output.lower() or "permission" in output.lower() or "deadline" in output.lower():
- return (77, "SCHED_DEADLINE not available")
logger.info("test failed: %s", e)
return -1
except Exception as e:
- error_str = str(e).lower()
- if "deadline" in error_str or "scheduler" in error_str:
- return (77, "SCHED_DEADLINE not available")
logger.info("test failed: %s", e)
return -1
@@ -257,15 +250,9 @@ def test_scheduler_deadline_no_period():
return 0 # Should succeed
except subprocess.CalledProcessError as e:
- output = e.output.decode('utf-8', errors='ignore') if e.output else ''
- if "scheduler" in output.lower() or "permission" in output.lower() or "deadline" in output.lower():
- return (77, "SCHED_DEADLINE not available")
logger.info("test failed: %s", e)
return -1
except Exception as e:
- error_str = str(e).lower()
- if "deadline" in error_str or "scheduler" in error_str:
- return (77, "SCHED_DEADLINE not available")
logger.info("test failed: %s", e)
return -1
@@ -292,9 +279,6 @@ def test_scheduler_flags():
return 0
except subprocess.CalledProcessError as e:
- output = e.output.decode('utf-8', errors='ignore') if e.output else ''
- if "scheduler" in output.lower() or "permission" in output.lower():
- return (77, "scheduler flags not available")
logger.info("test failed: %s", e)
return -1
except Exception as e:
@@ -330,8 +314,6 @@ def test_scheduler_deadline_missing_runtime():
output = e.output.decode('utf-8', errors='ignore') if e.output else ''
if "sched_setattr: `SCHED_DEADLINE` requires `runtime`" in output:
return 0 # Expected validation error
- if "scheduler" in output.lower() or "deadline" in output.lower():
- return (77, "SCHED_DEADLINE not available")
logger.info("unexpected error: %s", output)
return -1
except Exception as e:
@@ -367,8 +349,6 @@ def test_scheduler_deadline_missing_deadline():
output = e.output.decode('utf-8', errors='ignore') if e.output else ''
if "sched_setattr: `SCHED_DEADLINE` requires `deadline`" in output:
return 0 # Expected validation error
- if "scheduler" in output.lower() or "deadline" in output.lower():
- return (77, "SCHED_DEADLINE not available")
logger.info("unexpected error: %s", output)
return -1
except Exception as e:
@@ -407,8 +387,6 @@ def test_scheduler_deadline_zero_runtime():
output = e.output.decode('utf-8', errors='ignore') if e.output else ''
if "sched_setattr: `SCHED_DEADLINE` runtime must be greater than 0" in output:
return 0 # Expected validation error
- if "scheduler" in output.lower() or "deadline" in output.lower():
- return (77, "SCHED_DEADLINE not available")
logger.info("unexpected error: %s", output)
return -1
except Exception as e:
@@ -445,8 +423,6 @@ def test_scheduler_deadline_invalid_order():
output = e.output.decode('utf-8', errors='ignore') if e.output else ''
if "sched_setattr: `SCHED_DEADLINE` runtime" in output and "must be <=" in output and "deadline" in output:
return 0 # Expected validation error
- if "scheduler" in output.lower() or "deadline" in output.lower():
- return (77, "SCHED_DEADLINE not available")
logger.info("unexpected error: %s", output)
return -1
except Exception as e:
@@ -483,8 +459,6 @@ def test_scheduler_deadline_invalid_deadline_period():
output = e.output.decode('utf-8', errors='ignore') if e.output else ''
if "sched_setattr: `SCHED_DEADLINE` deadline" in output and "must be <=" in output and "period" in output:
return 0 # Expected validation error
- if "scheduler" in output.lower() or "deadline" in output.lower():
- return (77, "SCHED_DEADLINE not available")
logger.info("unexpected error: %s", output)
return -1
except Exception as e:
@@ -492,6 +466,72 @@ def test_scheduler_deadline_invalid_deadline_period():
return -1
+def test_scheduler_deadline_too_small_runtime():
+ """Test SCHED_DEADLINE validation - runtime < min."""
+ if is_rootless():
+ return (77, "SCHED_DEADLINE requires root")
+ if not is_sched_deadline_available():
+ return (77, "SCHED_DEADLINE not available in kernel")
+
+ conf = base_config()
+ add_all_namespaces(conf)
+
+ conf['process']['scheduler'] = {
+ 'policy': 'SCHED_DEADLINE',
+ 'runtime': 1023, # too small
+ 'deadline': 10000000, # 10ms
+ }
+
+ conf['process']['args'] = ['/init', 'true']
+
+ try:
+ out, _ = run_and_get_output(conf, hide_stderr=False)
+ # Should have failed due to too small runtime.
+ return -1
+
+ except subprocess.CalledProcessError as e:
+ output = e.output.decode('utf-8', errors='ignore') if e.output else ''
+ if "sched_setattr: `SCHED_DEADLINE` runtime " in output and " must be between " in output:
+ return 0 # Expected validation error
+ logger.info("unexpected error: %s", output)
+ return -1
+ except Exception as e:
+ logger.info("test failed: %s", e)
+ return -1
+
+
+def test_scheduler_deadline_too_big_runtime():
+ """Test SCHED_DEADLINE validation - runtime > max."""
+ if is_rootless():
+ return (77, "SCHED_DEADLINE requires root")
+ if not is_sched_deadline_available():
+ return (77, "SCHED_DEADLINE not available in kernel")
+
+ conf = base_config()
+ add_all_namespaces(conf)
+
+ conf['process']['scheduler'] = {
+ 'policy': 'SCHED_DEADLINE',
+ 'runtime': 9223372036854775809,
+ 'deadline': 9223372036854775810,
+ }
+
+ conf['process']['args'] = ['/init', 'true']
+
+ try:
+ out, _ = run_and_get_output(conf, hide_stderr=False)
+ # Should have failed due to too big runtime.
+ return -1
+
+ except subprocess.CalledProcessError as e:
+ output = e.output.decode('utf-8', errors='ignore') if e.output else ''
+ if "sched_setattr: `SCHED_DEADLINE` runtime " in output and " must be between " in output:
+ return 0 # Expected validation error
+ logger.info("unexpected error: %s", output)
+ return -1
+ except Exception as e:
+ logger.info("test failed: %s", e)
+ return -1
all_tests = {
@@ -509,6 +549,8 @@ all_tests = {
"scheduler-deadline-zero-runtime": test_scheduler_deadline_zero_runtime,
"scheduler-deadline-invalid-order": test_scheduler_deadline_invalid_order,
"scheduler-deadline-invalid-deadline-period": test_scheduler_deadline_invalid_deadline_period,
+ "scheduler-deadline-too-small-runtime": test_scheduler_deadline_too_small_runtime,
+ "scheduler-deadline-too-big-runtime": test_scheduler_deadline_too_big_runtime,
}
if __name__ == "__main__": |
|
@giuseppe I can keep probably force-push to this branch if you like, or keep working on this in any other way you see fit. |
a7265e0 to
e049820
Compare
|
@kolyshkin thanks! I've amended your changes, but if you find it easier to force push (or even open a new PR), that works for me |
|
Ephemeral COPR build failed. @containers/packit-build please check. |
|
TMT tests failed. @containers/packit-build please check. |
e049820 to
5854f43
Compare
|
Force-pushed with the following change: diff --git a/src/libcrun/scheduler.c b/src/libcrun/scheduler.c
index a229bb77..811c5819 100644
--- a/src/libcrun/scheduler.c
+++ b/src/libcrun/scheduler.c
@@ -97,7 +97,7 @@ libcrun_reset_cpu_affinity_mask (pid_t pid, libcrun_error_t *err)
}
static int
-diagnose_scheduler_failure (int ret, libcrun_error_t *err, runtime_spec_schema_config_schema_process *process,
+diagnose_scheduler_failure (libcrun_error_t *err, runtime_spec_schema_config_schema_process *process,
struct sched_attr_s *attr)
{
if (attr->sched_policy == SCHED_DEADLINE && errno == EINVAL)
@@ -222,7 +222,7 @@ libcrun_set_scheduler (pid_t pid, runtime_spec_schema_config_schema_process *pro
ret = syscall_sched_setattr (pid, &attr, 0);
if (UNLIKELY (ret < 0))
- return diagnose_scheduler_failure (ret, err, process, &attr);
+ return diagnose_scheduler_failure (err, process, &attr);
return 0;
}to fix EDIT: plus some clang-format changes |
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com> Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
5854f43 to
782ea50
Compare
|
Ephemeral COPR build failed. @containers/packit-build please check. |
|
TMT tests failed. @containers/packit-build please check. |
|
podman ci job keeps failing, will take a closer look next week. |
|
I think we should just skip "podman build and remove basic alpine with TMPDIR as relative" as it fails to mount the overlay (not the OCI runtime fault) |
No description provided.