Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ common --repository_cache=~/.cache/bazel-repo

# Persist build artifacts across clean/workspaces
common --disk_cache=~/.cache/bazel-disk
common --experimental_disk_cache_gc_max_size=100G

============================================================
# Build & Test performance
Expand All @@ -36,5 +37,3 @@ test --strategy=GoCompilePkg=local
test --test_output=errors
test --test_summary=terse
test --local_test_jobs=auto


15 changes: 15 additions & 0 deletions cmd/ctrl/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ var workerCmd = &cli.Command{

// Slack notifications
cli.String("quota-check-slack-webhook-url", "Slack webhook URL for quota exceeded notifications", cli.EnvVar("UNKEY_QUOTA_CHECK_SLACK_WEBHOOK_URL")),

// Observability
cli.Bool("otel-enabled", "Enable OpenTelemetry tracing and logging",
cli.Default(false),
cli.EnvVar("UNKEY_OTEL_ENABLED")),
cli.Float("otel-trace-sampling-rate", "Sampling rate for traces (0.0 to 1.0)",
cli.Default(0.01),
cli.EnvVar("UNKEY_OTEL_TRACE_SAMPLING_RATE")),
cli.String("region", "Cloud region identifier",
cli.EnvVar("UNKEY_REGION")),
},
Action: workerAction,
}
Expand Down Expand Up @@ -188,6 +198,11 @@ func workerAction(ctx context.Context, cmd *cli.Command) error {

// Slack notifications
QuotaCheckSlackWebhookURL: cmd.String("quota-check-slack-webhook-url"),

// Observability
OtelEnabled: cmd.Bool("otel-enabled"),
OtelTraceSamplingRate: cmd.Float("otel-trace-sampling-rate"),
Region: cmd.String("region"),
}

err := config.Validate()
Expand Down
34 changes: 22 additions & 12 deletions cmd/krane/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,25 +80,35 @@ unkey run krane # Run with default configurati
cli.String("cluster-id", "ID of the cluster",
cli.Default("local"),
cli.EnvVar("UNKEY_CLUSTER_ID")),

// Observability
cli.Bool("otel-enabled", "Enable OpenTelemetry tracing and logging",
cli.Default(false),
cli.EnvVar("UNKEY_OTEL_ENABLED")),
cli.Float("otel-trace-sampling-rate", "Sampling rate for traces (0.0 to 1.0)",
cli.Default(0.01),
cli.EnvVar("UNKEY_OTEL_TRACE_SAMPLING_RATE")),
},
Action: action,
}

func action(ctx context.Context, cmd *cli.Command) error {

config := krane.Config{
Clock: nil,
Region: cmd.RequireString("region"),
InstanceID: cmd.RequireString("instance-id"),
RegistryURL: cmd.RequireString("registry-url"),
RegistryUsername: cmd.RequireString("registry-username"),
RegistryPassword: cmd.RequireString("registry-password"),
RPCPort: cmd.RequireInt("rpc-port"),
VaultURL: cmd.String("vault-url"),
VaultToken: cmd.String("vault-token"),
PrometheusPort: cmd.RequireInt("prometheus-port"),
ControlPlaneURL: cmd.RequireString("control-plane-url"),
ControlPlaneBearer: cmd.RequireString("control-plane-bearer"),
Clock: nil,
Region: cmd.RequireString("region"),
InstanceID: cmd.RequireString("instance-id"),
RegistryURL: cmd.RequireString("registry-url"),
RegistryUsername: cmd.RequireString("registry-username"),
RegistryPassword: cmd.RequireString("registry-password"),
RPCPort: cmd.RequireInt("rpc-port"),
VaultURL: cmd.String("vault-url"),
VaultToken: cmd.String("vault-token"),
PrometheusPort: cmd.RequireInt("prometheus-port"),
ControlPlaneURL: cmd.RequireString("control-plane-url"),
ControlPlaneBearer: cmd.RequireString("control-plane-bearer"),
OtelEnabled: cmd.Bool("otel-enabled"),
OtelTraceSamplingRate: cmd.Float("otel-trace-sampling-rate"),
}

// Validate configuration
Expand Down
15 changes: 15 additions & 0 deletions cmd/vault/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ var Cmd = &cli.Command{
cli.String("s3-access-key-secret", "S3 secret access key for general vault",
cli.Required(),
cli.EnvVar("UNKEY_S3_ACCESS_KEY_SECRET")),

// Observability
cli.Bool("otel-enabled", "Enable OpenTelemetry tracing and logging",
cli.Default(false),
cli.EnvVar("UNKEY_OTEL_ENABLED")),
cli.Float("otel-trace-sampling-rate", "Sampling rate for traces (0.0 to 1.0)",
cli.Default(0.01),
cli.EnvVar("UNKEY_OTEL_TRACE_SAMPLING_RATE")),
cli.String("region", "Cloud region identifier",
cli.EnvVar("UNKEY_REGION")),
},
Action: action,
}
Expand All @@ -61,6 +71,11 @@ func action(ctx context.Context, cmd *cli.Command) error {
S3AccessKeySecret: cmd.RequireString("s3-access-key-secret"),
MasterKeys: cmd.RequireStringSlice("master-keys"),
BearerToken: cmd.RequireString("bearer-token"),

// Observability
OtelEnabled: cmd.Bool("otel-enabled"),
OtelTraceSamplingRate: cmd.Float("otel-trace-sampling-rate"),
Region: cmd.String("region"),
}

err := config.Validate()
Expand Down
4 changes: 2 additions & 2 deletions dev/k8s/manifests/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ spec:
value: "10485760"
readinessProbe:
httpGet:
path: /v2/liveness
path: /health/ready
port: 7070
initialDelaySeconds: 10
periodSeconds: 5
livenessProbe:
httpGet:
path: /v2/liveness
path: /health/live
port: 7070
initialDelaySeconds: 30
periodSeconds: 10
Expand Down
12 changes: 12 additions & 0 deletions dev/k8s/manifests/ctrl-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ spec:
imagePullPolicy: Never # Use local images
ports:
- containerPort: 7091
readinessProbe:
httpGet:
path: /health/ready
port: 7091
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
httpGet:
path: /health/live
port: 7091
initialDelaySeconds: 10
periodSeconds: 10
env:
# Server Configuration
- name: UNKEY_HTTP_PORT
Expand Down
4 changes: 2 additions & 2 deletions dev/k8s/manifests/ctrl-worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ spec:
- containerPort: 9080
livenessProbe:
httpGet:
path: /health
path: /health/live
port: 9080
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
path: /health/ready
port: 9080
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
15 changes: 13 additions & 2 deletions dev/k8s/manifests/frontline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,29 @@ spec:
readOnly: true
readinessProbe:
httpGet:
path: /_unkey/internal/health
path: /_unkey/internal/health/ready
port: 7070
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
httpGet:
path: /_unkey/internal/health/live
port: 7070
initialDelaySeconds: 10
periodSeconds: 10
volumes:
- name: tls-certs
configMap:
name: frontline-certs
initContainers:
- name: wait-for-dependencies
image: busybox:1.36
command: ["sh", "-c", "until nc -z mysql 3306 && nc -z ctrl-api 7091 && nc -z vault 8060; do sleep 2; done"]
command:
[
"sh",
"-c",
"until nc -z mysql 3306 && nc -z ctrl-api 7091 && nc -z vault 8060; do sleep 2; done",
]
---
apiVersion: v1
kind: Service
Expand Down
12 changes: 12 additions & 0 deletions dev/k8s/manifests/krane.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ spec:
imagePullPolicy: Never # Use local images
ports:
- containerPort: 8070
readinessProbe:
httpGet:
path: /health/ready
port: 8070
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
httpGet:
path: /health/live
port: 8070
initialDelaySeconds: 10
periodSeconds: 10
env:
# Server configuration
- name: UNKEY_REGION
Expand Down
4 changes: 2 additions & 2 deletions dev/k8s/manifests/preflight.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,14 @@ spec:
memory: 128Mi
livenessProbe:
httpGet:
path: /healthz
path: /health/live
port: 8443
scheme: HTTPS
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
path: /health/ready
port: 8443
scheme: HTTPS
initialDelaySeconds: 5
Expand Down
1 change: 0 additions & 1 deletion pkg/otel/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ go_library(
deps = [
"//pkg/otel/logging",
"//pkg/otel/tracing",
"//pkg/shutdown",
"//pkg/version",
"@com_github_shirou_gopsutil_v4//cpu",
"@com_github_shirou_gopsutil_v4//mem",
Expand Down
45 changes: 26 additions & 19 deletions pkg/otel/grafana.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"github.com/shirou/gopsutil/v4/mem"
"github.com/unkeyed/unkey/pkg/otel/logging"
"github.com/unkeyed/unkey/pkg/otel/tracing"
"github.com/unkeyed/unkey/pkg/shutdown"
"github.com/unkeyed/unkey/pkg/version"
"go.opentelemetry.io/contrib/bridges/otelslog"
"go.opentelemetry.io/contrib/bridges/prometheus"
Expand Down Expand Up @@ -88,7 +87,7 @@ type Config struct {
// for _, err := range errs {
// log.Printf("Shutdown error: %v", err)
// }
func InitGrafana(ctx context.Context, config Config, shutdowns *shutdown.Shutdowns) error {
func InitGrafana(ctx context.Context, config Config) (func(ctx context.Context) error, error) {
// Create a resource with common attributes
res, err := resource.New(ctx,
resource.WithAttributes(
Expand All @@ -100,21 +99,19 @@ func InitGrafana(ctx context.Context, config Config, shutdowns *shutdown.Shutdow
),
)
if err != nil {
return fmt.Errorf("failed to create resource: %w", err)
return nil, fmt.Errorf("failed to create resource: %w", err)
}

// Configure OTLP log handler
logExporter, err := otlploghttp.New(ctx,
otlploghttp.WithCompression(otlploghttp.GzipCompression),
)
if err != nil {
return fmt.Errorf("failed to create log exporter: %w", err)
return nil, fmt.Errorf("failed to create log exporter: %w", err)
}
shutdowns.RegisterCtx(logExporter.Shutdown)

var processor log.Processor = log.NewBatchProcessor(logExporter, log.WithExportBufferSize(512), log.WithExportInterval(15*time.Second))
processor = minsev.NewLogProcessor(processor, minsev.SeverityInfo)
shutdowns.RegisterCtx(processor.Shutdown)

// if config.LogDebug {
// processor = minsev.NewLogProcessor(processor, minsev.SeverityDebug)
Expand All @@ -124,7 +121,6 @@ func InitGrafana(ctx context.Context, config Config, shutdowns *shutdown.Shutdow
log.WithResource(res),
log.WithProcessor(processor),
)
shutdowns.RegisterCtx(logProvider.Shutdown)

logging.AddHandler(otelslog.NewHandler(
config.Application,
Expand All @@ -140,11 +136,10 @@ func InitGrafana(ctx context.Context, config Config, shutdowns *shutdown.Shutdow
// otlptracehttp.WithInsecure(), // For local development
)
if err != nil {
return fmt.Errorf("failed to create trace exporter: %w", err)
return nil, fmt.Errorf("failed to create trace exporter: %w", err)
}

// Register shutdown function for trace exporter
shutdowns.RegisterCtx(traceExporter.Shutdown)

var sampler trace.Sampler

Expand Down Expand Up @@ -174,9 +169,6 @@ func InitGrafana(ctx context.Context, config Config, shutdowns *shutdown.Shutdow
trace.WithSampler(sampler),
)

// Register shutdown function for trace provider
shutdowns.RegisterCtx(traceProvider.Shutdown)

// Set the global trace provider
otel.SetTracerProvider(traceProvider)
tracing.SetGlobalTraceProvider(traceProvider)
Expand All @@ -187,27 +179,42 @@ func InitGrafana(ctx context.Context, config Config, shutdowns *shutdown.Shutdow
// otlpmetrichttp.WithInsecure(), // For local development
)
if err != nil {
return fmt.Errorf("failed to create metric exporter: %w", err)
return nil, fmt.Errorf("failed to create metric exporter: %w", err)
}

shutdowns.RegisterCtx(metricExporter.Shutdown)

bridge := prometheus.NewMetricProducer()

reader := metricsdk.NewPeriodicReader(metricExporter, metricsdk.WithProducer(bridge), metricsdk.WithInterval(60*time.Second))
shutdowns.RegisterCtx(reader.Shutdown)

// Create and register the metric provider globally
meterProvider := metricsdk.NewMeterProvider(metricsdk.WithReader(reader), metricsdk.WithResource(res))
shutdowns.RegisterCtx(meterProvider.Shutdown)
otel.SetMeterProvider(meterProvider)

err = registerSystemMetrics(meterProvider.Meter(config.Application))
if err != nil {
return err
return nil, err
}

return nil
// return combined shutdown function that will be called during application termination to cleanly shut down all telemetry components
return func(ctx context.Context) error {
for _, fn := range []func(context.Context) error{
meterProvider.Shutdown,
reader.Shutdown,
metricExporter.Shutdown,
traceProvider.Shutdown,
traceExporter.Shutdown,
logProvider.Shutdown,
processor.Shutdown,
logExporter.Shutdown,
} {
if err := fn(ctx); err != nil {
return err
}

}
return nil

}, nil
}

func registerSystemMetrics(m metric.Meter) error {
Expand Down
12 changes: 10 additions & 2 deletions pkg/runner/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,27 @@ load("@rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "runner",
srcs = ["runner.go"],
srcs = [
"health.go",
"runner.go",
],
importpath = "github.com/unkeyed/unkey/pkg/runner",
visibility = ["//visibility:public"],
deps = ["//pkg/otel/logging"],
)

go_test(
name = "runner_test",
srcs = [
"concurrency_test.go",
"error_test.go",
"health_test.go",
"run_test.go",
"runner_test.go",
],
embed = [":runner"],
deps = ["@com_github_stretchr_testify//require"],
deps = [
"//pkg/otel/logging",
"@com_github_stretchr_testify//require",
],
)
Loading
Loading