diff --git a/deploy/sre-prometheus/ocm-agent/obo-monitoring/100-oidc-missing.PrometheusRule.yaml b/deploy/sre-prometheus/ocm-agent/obo-monitoring/100-oidc-missing.PrometheusRule.yaml index fdb95aafe9..ffc39cc464 100644 --- a/deploy/sre-prometheus/ocm-agent/obo-monitoring/100-oidc-missing.PrometheusRule.yaml +++ b/deploy/sre-prometheus/ocm-agent/obo-monitoring/100-oidc-missing.PrometheusRule.yaml @@ -17,10 +17,10 @@ spec: description: "Customer cloud environment is unreachable from the management cluster due to invalid aws credentials" summary: "Cluster has invalid AWS credentials" # Clusters tend to have their `hypershift_cluster_invalid_aws_creds` set to > 0 while the HCP didn't finish the installation, thus we check - # that the HCP is not rolling out in our expression (= hypershift_cluster_waiting_initial_avaibility_duration_seconds does not exist) + # that the HCP is not rolling out in our expression (= hypershift_cluster_waiting_initial_availability_duration_seconds does not exist) # hypershift_cluster_waiting_initial_avaibility_duration_seconds stops being emitted once the HCP is rolled out # This will be fixed with https://issues.redhat.com/browse/OCPBUGS-63353 - expr: (max by (exported_namespace, _id) (hypershift_cluster_invalid_aws_creds) == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_avaibility_duration_seconds or hypershift_cluster_deleting_duration_seconds) + expr: (max by (exported_namespace, _id) (hypershift_cluster_invalid_aws_creds) == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_availability_duration_seconds or hypershift_cluster_deleting_duration_seconds) for: 4m # api-ErrorBudgetBurn is our highest SLA and triggers after 5 minutes of CrashLooping kube-apiserver pods. KAS pods can CrashLoop due to missing OIDC/invalid AWS permissions. To reduce self-resolving alerts, we need the limited support to be in place before the alert triggers. labels: severity: warning diff --git a/hack/00-osd-managed-cluster-config-integration.yaml.tmpl b/hack/00-osd-managed-cluster-config-integration.yaml.tmpl index 12e09f062a..8da0ca4046 100644 --- a/hack/00-osd-managed-cluster-config-integration.yaml.tmpl +++ b/hack/00-osd-managed-cluster-config-integration.yaml.tmpl @@ -51086,7 +51086,7 @@ objects: cluster due to invalid aws credentials summary: Cluster has invalid AWS credentials expr: (max by (exported_namespace, _id) (hypershift_cluster_invalid_aws_creds) - == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_avaibility_duration_seconds + == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_availability_duration_seconds or hypershift_cluster_deleting_duration_seconds) for: 4m labels: diff --git a/hack/00-osd-managed-cluster-config-production.yaml.tmpl b/hack/00-osd-managed-cluster-config-production.yaml.tmpl index 12e09f062a..8da0ca4046 100644 --- a/hack/00-osd-managed-cluster-config-production.yaml.tmpl +++ b/hack/00-osd-managed-cluster-config-production.yaml.tmpl @@ -51086,7 +51086,7 @@ objects: cluster due to invalid aws credentials summary: Cluster has invalid AWS credentials expr: (max by (exported_namespace, _id) (hypershift_cluster_invalid_aws_creds) - == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_avaibility_duration_seconds + == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_availability_duration_seconds or hypershift_cluster_deleting_duration_seconds) for: 4m labels: diff --git a/hack/00-osd-managed-cluster-config-stage.yaml.tmpl b/hack/00-osd-managed-cluster-config-stage.yaml.tmpl index 12e09f062a..8da0ca4046 100644 --- a/hack/00-osd-managed-cluster-config-stage.yaml.tmpl +++ b/hack/00-osd-managed-cluster-config-stage.yaml.tmpl @@ -51086,7 +51086,7 @@ objects: cluster due to invalid aws credentials summary: Cluster has invalid AWS credentials expr: (max by (exported_namespace, _id) (hypershift_cluster_invalid_aws_creds) - == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_avaibility_duration_seconds + == 1) unless on (exported_namespace) (hypershift_cluster_waiting_initial_availability_duration_seconds or hypershift_cluster_deleting_duration_seconds) for: 4m labels: