Skip to content

Commit b7f7aef

Browse files
authored
Merge pull request #276 from coroot/systemd_services
improve systemd service detection and metadata collection
2 parents cb1c95b + 5922043 commit b7f7aef

File tree

8 files changed

+108
-26
lines changed

8 files changed

+108
-26
lines changed

cgroup/cgroup.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ func containerByCgroup(cgroupPath string) (ContainerType, string, error) {
162162
switch {
163163
case cgroupPath == "/init":
164164
return ContainerTypeTalosRuntime, "/talos/init", nil
165-
case prefix == "user.slice" || prefix == "init.scope":
165+
case prefix == "user.slice" || prefix == "init.scope" || prefix == "systemd":
166166
return ContainerTypeStandaloneProcess, "", nil
167167
case prefix == "docker" || (prefix == "system.slice" && len(parts) > 1 && strings.HasPrefix(parts[1], "docker-")):
168168
matches := dockerIdRegexp.FindStringSubmatch(cgroupPath)
@@ -194,6 +194,9 @@ func containerByCgroup(cgroupPath string) (ContainerType, string, error) {
194194
}
195195
return ContainerTypeTalosRuntime, path.Join("/talos/", matches[2]), nil
196196
case prefix == "system.slice" || prefix == "runtime.slice" || prefix == "reserved.slice" || prefix == "kube.slice" || prefix == "azure.slice":
197+
if strings.HasSuffix(cgroupPath, ".scope") {
198+
return ContainerTypeStandaloneProcess, "", nil
199+
}
197200
matches := systemSliceIdRegexp.FindStringSubmatch(cgroupPath)
198201
if matches == nil {
199202
return ContainerTypeUnknown, "", fmt.Errorf("invalid systemd cgroup %s", cgroupPath)

cgroup/cgroup_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,4 +205,24 @@ func TestContainerByCgroup(t *testing.T) {
205205
as.Equal(ContainerTypeStandaloneProcess, typ)
206206
as.Equal("", id)
207207
as.Nil(err)
208+
209+
typ, id, err = containerByCgroup("/systemd/system.slice")
210+
as.Equal(ContainerTypeStandaloneProcess, typ)
211+
as.Equal("", id)
212+
as.Nil(err)
213+
214+
typ, id, err = containerByCgroup("/system.slice/cri-containerd-69e8ded3c33c9d5e2b93acd74787b17a8629f74d6707bc5bb9b2e095337d0263.scope")
215+
as.Equal(ContainerTypeStandaloneProcess, typ)
216+
as.Equal("", id)
217+
as.Nil(err)
218+
219+
typ, id, err = containerByCgroup("/system.slice/run-ra2ddf9594bbf4a1986439b594f89eb0f.scope")
220+
as.Equal(ContainerTypeStandaloneProcess, typ)
221+
as.Equal("", id)
222+
as.Nil(err)
223+
224+
typ, id, err = containerByCgroup("/system.slice/docker-ba7b10d15d16e10e3de7a2dcd408a3d971169ae303f46cfad4c5453c6326fee2.scope")
225+
as.Equal(ContainerTypeDocker, typ)
226+
as.Equal("ba7b10d15d16e10e3de7a2dcd408a3d971169ae303f46cfad4c5453c6326fee2", id)
227+
as.Nil(err)
208228
}

containers/container.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,16 @@ type ContainerNetwork struct {
3939
}
4040

4141
type ContainerMetadata struct {
42-
name string
43-
labels map[string]string
44-
volumes map[string]string
45-
logPath string
46-
image string
47-
logDecoder logparser.Decoder
48-
hostListens map[string][]netaddr.IPPort
49-
networks map[string]ContainerNetwork
50-
env map[string]string
51-
systemdTriggeredBy string
42+
name string
43+
labels map[string]string
44+
volumes map[string]string
45+
logPath string
46+
image string
47+
logDecoder logparser.Decoder
48+
hostListens map[string][]netaddr.IPPort
49+
networks map[string]ContainerNetwork
50+
env map[string]string
51+
systemd SystemdProperties
5252
}
5353

5454
type Delays struct {
@@ -237,8 +237,8 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
237237
c.lock.Lock()
238238
defer c.lock.Unlock()
239239

240-
if c.metadata.image != "" || c.metadata.systemdTriggeredBy != "" {
241-
ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemdTriggeredBy)
240+
if c.metadata.image != "" || !c.metadata.systemd.IsEmpty() {
241+
ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemd.TriggeredBy, c.metadata.systemd.Type)
242242
}
243243

244244
ch <- counter(metrics.Restarts, float64(c.restarts))

containers/metrics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ var metrics = struct {
6161

6262
Ip2Fqdn *prometheus.Desc
6363
}{
64-
ContainerInfo: metric("container_info", "Meta information about the container", "image", "systemd_triggered_by"),
64+
ContainerInfo: metric("container_info", "Meta information about the container", "image", "systemd_triggered_by", "systemd_type"),
6565

6666
Restarts: metric("container_restarts_total", "Number of times the container was restarted"),
6767

containers/registry.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -284,14 +284,10 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
284284
if c := r.getOrCreateContainer(e.Pid); c != nil {
285285
c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.ActualDstAddr, e.Timestamp, false, e.Duration)
286286
c.attachTlsUprobes(r.tracer, e.Pid)
287-
} else {
288-
klog.Infoln("TCP connection from unknown container", e)
289287
}
290288
case ebpftracer.EventTypeConnectionError:
291289
if c := r.getOrCreateContainer(e.Pid); c != nil {
292290
c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.ActualDstAddr, 0, true, e.Duration)
293-
} else {
294-
klog.Infoln("TCP connection error from unknown container", e)
295291
}
296292
case ebpftracer.EventTypeConnectionClose:
297293
if c := r.containersByPid[e.Pid]; c != nil {
@@ -377,6 +373,14 @@ func (r *Registry) getOrCreateContainer(pid uint32) *Container {
377373
r.containersByPidIgnored[pid] = &t
378374
return nil
379375
}
376+
if cg.ContainerType == cgroup.ContainerTypeSystemdService && *flags.SkipSystemdSystemServices {
377+
if md.systemd.IsSystemService() {
378+
klog.InfoS("skipping system service", "id", id, "unit", md.systemd.Unit, "type", md.systemd.Type, "triggered_by", md.systemd.TriggeredBy, "pid", pid)
379+
t := time.Now()
380+
r.containersByPidIgnored[pid] = &t
381+
return nil
382+
}
383+
}
380384

381385
if c := r.containersById[id]; c != nil {
382386
klog.Warningln("id conflict:", id)
@@ -542,7 +546,7 @@ func getContainerMetadata(cg *cgroup.Cgroup) (*ContainerMetadata, error) {
542546
switch cg.ContainerType {
543547
case cgroup.ContainerTypeSystemdService:
544548
md := &ContainerMetadata{}
545-
md.systemdTriggeredBy = SystemdTriggeredBy(cg.ContainerId)
549+
md.systemd = getSystemdProperties(cg.Id)
546550
return md, nil
547551
case cgroup.ContainerTypeDocker, cgroup.ContainerTypeContainerd, cgroup.ContainerTypeSandbox, cgroup.ContainerTypeCrio:
548552
default:

containers/systemd.go

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,23 @@ import (
1818
var (
1919
dbusConn *dbus.Conn
2020
dbusTimeout = time.Second
21+
22+
systemServicePrefixes = []string{
23+
"systemd-",
24+
"dbus",
25+
"getty",
26+
"system-serial",
27+
"system-getty",
28+
"serial-getty",
29+
"snapd",
30+
"packagekit",
31+
"unattended-upgrades",
32+
"multipathd",
33+
"qemu-guest-agent",
34+
"irqbalance",
35+
"networkd-dispatcher",
36+
"rpcbind",
37+
}
2138
)
2239

2340
func init() {
@@ -39,18 +56,54 @@ func init() {
3956
}
4057
}
4158

42-
func SystemdTriggeredBy(id string) string {
59+
type SystemdProperties struct {
60+
Unit string
61+
TriggeredBy string
62+
Type string
63+
}
64+
65+
func (sp SystemdProperties) IsEmpty() bool {
66+
return sp.TriggeredBy == "" && sp.Type == ""
67+
}
68+
69+
func (sp SystemdProperties) IsSystemService() bool {
70+
switch sp.Type {
71+
case "oneshot", "dbus":
72+
return true
73+
}
74+
if strings.HasSuffix(sp.TriggeredBy, ".timer") {
75+
return true
76+
}
77+
for _, prefix := range systemServicePrefixes {
78+
if strings.HasPrefix(sp.Unit, prefix) {
79+
return true
80+
}
81+
}
82+
return false
83+
}
84+
85+
func getSystemdProperties(id string) SystemdProperties {
86+
props := SystemdProperties{}
4387
if dbusConn == nil {
44-
return ""
88+
return props
4589
}
4690
ctx, cancel := context.WithTimeout(context.Background(), dbusTimeout)
4791
defer cancel()
4892
parts := strings.Split(id, "/")
4993
unit := parts[len(parts)-1]
50-
if prop, _ := dbusConn.GetUnitPropertyContext(ctx, unit, "TriggeredBy"); prop != nil {
51-
if values, _ := prop.Value.Value().([]string); len(values) > 0 {
52-
return values[0]
94+
props.Unit = unit
95+
properties, err := dbusConn.GetAllPropertiesContext(ctx, unit)
96+
if err != nil {
97+
klog.Warningln("failed to get systemd properties:", err)
98+
return props
99+
}
100+
if v, ok := properties["TriggeredBy"]; ok {
101+
if values, _ := v.([]string); len(values) > 0 {
102+
props.TriggeredBy = values[0]
53103
}
54104
}
55-
return ""
105+
if v, ok := properties["Type"]; ok {
106+
props.Type, _ = v.(string)
107+
}
108+
return props
56109
}

flags/flags.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ var (
1818
ContainerAllowlist = kingpin.Flag("container-allowlist", "List of allowed containers (regex patterns)").Envar("CONTAINER_ALLOWLIST").Strings()
1919
ContainerDenylist = kingpin.Flag("container-denylist", "List of denied containers (regex patterns)").Envar("CONTAINER_DENYLIST").Strings()
2020

21+
SkipSystemdSystemServices = kingpin.Flag("skip-systemd-system-services", "Skip well-known systemd system services (apt, motd, udev, etc.)").Default("true").Envar("SKIP_SYSTEMD_SYSTEM_SERVICES").Bool()
22+
2123
ExcludeHTTPMetricsByPath = kingpin.Flag("exclude-http-requests-by-path", "Skip HTTP metrics and traces by path").Envar("EXCLUDE_HTTP_REQUESTS_BY_PATH").Strings()
2224

2325
ExternalNetworksWhitelist = kingpin.

proc/fd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func ReadFds(pid uint32) ([]Fd, error) {
3434
}
3535
dest, err := os.Readlink(path.Join(fdDir, entry.Name()))
3636
if err != nil {
37-
if os.IsNotExist(err) {
37+
if !os.IsNotExist(err) {
3838
klog.Warningf("failed to read link '%s': %s", entry.Name(), err)
3939
}
4040
continue

0 commit comments

Comments
 (0)