Skip to content

Commit 3556c06

Browse files
committed
Move more rules as recording
1 parent 0bb7f34 commit 3556c06

File tree

2 files changed

+38
-41
lines changed

2 files changed

+38
-41
lines changed

api/flowcollector/v1beta2/flowcollector_defaults.go

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,11 @@ var (
4949
DefaultHealthRules = []FLPHealthRule{
5050
{
5151
Template: HealthRulePacketDropsByKernel,
52+
Mode: ModeRecording,
5253
Variants: []HealthRuleVariant{
53-
{
54-
Mode: ptr.To(ModeRecording),
55-
Thresholds: HealthRuleThresholds{
56-
Info: "10",
57-
},
58-
LowVolumeThreshold: "5",
59-
GroupBy: GroupByNamespace,
60-
},
6154
{
6255
Thresholds: HealthRuleThresholds{
56+
Info: "10",
6357
Warning: "20",
6458
},
6559
LowVolumeThreshold: "5",
@@ -68,13 +62,8 @@ var (
6862
{
6963
Mode: ptr.To(ModeRecording),
7064
Thresholds: HealthRuleThresholds{
71-
Info: "5",
72-
},
73-
GroupBy: GroupByNode,
74-
},
75-
{
76-
Thresholds: HealthRuleThresholds{
77-
Warning: "10",
65+
Info: "5",
66+
Warning: "15",
7867
},
7968
GroupBy: GroupByNode,
8069
},
@@ -85,7 +74,8 @@ var (
8574
Variants: []HealthRuleVariant{
8675
{
8776
Thresholds: HealthRuleThresholds{
88-
Warning: "5",
77+
Info: "5",
78+
Warning: "10",
8979
},
9080
GroupBy: GroupByNode,
9181
},
@@ -96,12 +86,12 @@ var (
9686
Variants: []HealthRuleVariant{
9787
{
9888
Thresholds: HealthRuleThresholds{
99-
Critical: "2",
89+
Warning: "2",
10090
},
10191
},
10292
{
10393
Thresholds: HealthRuleThresholds{
104-
Critical: "2",
94+
Warning: "2",
10595
},
10696
GroupBy: GroupByNode,
10797
},
@@ -139,16 +129,11 @@ var (
139129
},
140130
{
141131
Template: HealthRuleNetpolDenied,
132+
Mode: ModeRecording,
142133
Variants: []HealthRuleVariant{
143-
{
144-
Mode: ptr.To(ModeRecording),
145-
Thresholds: HealthRuleThresholds{
146-
Info: "5",
147-
},
148-
GroupBy: GroupByNamespace,
149-
},
150134
{
151135
Thresholds: HealthRuleThresholds{
136+
Info: "5",
152137
Warning: "10",
153138
},
154139
GroupBy: GroupByNamespace,

internal/pkg/metrics/alerts/alerts_test.go

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -98,37 +98,49 @@ func TestBuildRules_DefaultWithFeaturesAndDisabled(t *testing.T) {
9898
assert.Equal(t, []string{
9999
"netobserv:health:packet_drops_kernel:namespace:src:rate2m",
100100
"netobserv:health:packet_drops_kernel:namespace:dst:rate2m",
101-
"PacketDropsByKernel_PerSrcNamespaceWarning",
102-
"PacketDropsByKernel_PerDstNamespaceWarning",
103101
"netobserv:health:packet_drops_kernel:node:src:rate2m",
104102
"netobserv:health:packet_drops_kernel:node:dst:rate2m",
105-
"PacketDropsByKernel_PerSrcNodeWarning",
106-
"PacketDropsByKernel_PerDstNodeWarning",
107103
"PacketDropsByDevice_PerNodeWarning",
108-
"IPsecErrors_Critical",
109-
"IPsecErrors_PerSrcNodeCritical",
110-
"IPsecErrors_PerDstNodeCritical",
104+
"PacketDropsByDevice_PerNodeInfo",
105+
"IPsecErrors_Warning",
106+
"IPsecErrors_PerSrcNodeWarning",
107+
"IPsecErrors_PerDstNodeWarning",
111108
"DNSErrors_Warning",
112109
"DNSErrors_PerDstNamespaceWarning",
113110
"DNSErrors_PerDstNamespaceInfo",
114111
"netobserv:health:dns_nxdomain:namespace:dst:rate2m",
115112
"netobserv:health:netpol_denied:namespace:src:rate2m",
116113
"netobserv:health:netpol_denied:namespace:dst:rate2m",
117-
"NetpolDenied_PerSrcNamespaceWarning",
118-
"NetpolDenied_PerDstNamespaceWarning",
119114
"netobserv:health:tcp_latency_increase_p90:namespace:src:rate2m",
120115
"netobserv:health:tcp_latency_increase_p90:namespace:dst:rate2m",
121116
"netobserv:health:ingress_5xx_errors:namespace:src:rate2m",
122117
"netobserv:health:ingress_http_latency_increase_avg:namespace:src:rate2m",
123118
"NetObservNoFlows",
124119
}, allNames(rules))
125-
assert.Contains(t, rules[2].Annotations["description"], "NetObserv is detecting more than 20% of packets dropped by the kernel [source namespace={{ $labels.namespace }}]")
126-
assert.Equal(t, `{"alertThreshold":"20","unit":"%","namespaceLabels":["namespace"]}`, rules[2].Annotations["netobserv_io_network_health"])
127-
assert.Contains(t, rules[6].Annotations["description"], "NetObserv is detecting more than 10% of packets dropped by the kernel [source node={{ $labels.node }}]")
128-
assert.Equal(t, `{"alertThreshold":"10","unit":"%","nodeLabels":["node"]}`, rules[6].Annotations["netobserv_io_network_health"])
129-
assert.Contains(t, rules[8].Annotations["description"], "node-exporter is reporting more than 5% of dropped packets [node={{ $labels.instance }}]")
130-
assert.Equal(t, `{"alertThreshold":"5","unit":"%","nodeLabels":["instance"]}`, rules[8].Annotations["netobserv_io_network_health"])
131-
assert.Contains(t, rules[len(rules)-1].Annotations["description"], "NetObserv flowlogs-pipeline is not receiving any flow")
120+
r := findRule("PacketDropsByDevice_PerNodeWarning", rules)
121+
assert.NotNil(t, r)
122+
assert.Equal(t, "node-exporter is reporting more than 10% of dropped packets [node={{ $labels.instance }}].", r.Annotations["description"])
123+
assert.Equal(t, `{"alertThreshold":"10","unit":"%","nodeLabels":["instance"]}`, r.Annotations["netobserv_io_network_health"])
124+
r = findRule("IPsecErrors_Warning", rules)
125+
assert.NotNil(t, r)
126+
assert.Equal(t, "NetObserv is detecting more than 2% of IPsec errors.", r.Annotations["description"])
127+
assert.Equal(t, `{"alertThreshold":"2","unit":"%"}`, r.Annotations["netobserv_io_network_health"])
128+
r = findRule("DNSErrors_PerDstNamespaceWarning", rules)
129+
assert.NotNil(t, r)
130+
assert.Equal(t, "NetObserv is detecting more than 10% of DNS errors [dest. namespace={{ $labels.namespace }}] (other than NX_DOMAIN).", r.Annotations["description"])
131+
assert.Contains(t, r.Annotations["netobserv_io_network_health"], `{"alertThreshold":"10","unit":"%","namespaceLabels":["namespace"]`)
132+
r = findRule("NetObservNoFlows", rules)
133+
assert.NotNil(t, r)
134+
assert.Contains(t, r.Annotations["description"], "NetObserv flowlogs-pipeline is not receiving any flow")
135+
}
136+
137+
func findRule(name string, rules []monitoringv1.Rule) *monitoringv1.Rule {
138+
for i := range rules {
139+
if rules[i].Alert == name {
140+
return &rules[i]
141+
}
142+
}
143+
return nil
132144
}
133145

134146
func TestBuildRules_DefaultWithFeaturesAndAllDisabled(t *testing.T) {

0 commit comments

Comments
 (0)