diff --git a/go.mod b/go.mod index 33eb78c7647..2af82aae517 100644 --- a/go.mod +++ b/go.mod @@ -107,10 +107,12 @@ require ( github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 github.com/shirou/gopsutil/v4 v4.25.4 github.com/slackhq/vitess-addons v0.22.1 + github.com/slok/noglog v0.2.0 github.com/spf13/afero v1.12.0 github.com/spf13/jwalterweatherman v1.1.0 github.com/xlab/treeprint v1.2.0 go.uber.org/goleak v1.3.0 + go.uber.org/zap v1.27.0 golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 golang.org/x/sync v0.12.0 gonum.org/v1/gonum v0.15.1 @@ -230,7 +232,6 @@ require ( go.opentelemetry.io/otel/trace v1.35.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - go.uber.org/zap v1.27.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250313205543-e70fdf4c4cb4 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 // indirect diff --git a/go.sum b/go.sum index f3ff3b31cd7..47614d997de 100644 --- a/go.sum +++ b/go.sum @@ -529,6 +529,8 @@ github.com/sjmudd/stopwatch v0.1.1 h1:x45OvxFB5OtCkjvYtzRF5fWB857Jzjjk84Oyd5C5eb github.com/sjmudd/stopwatch v0.1.1/go.mod h1:BLw0oIQJ1YLXBO/q9ufK/SgnKBVIkC2qrm6uy78Zw6U= github.com/slackhq/vitess-addons v0.22.1 h1:iyIZzhIanMtlzVA+75+qGoqGU7xXm6QWNClYsbTTcLA= github.com/slackhq/vitess-addons v0.22.1/go.mod h1:9yfuhO4dKYIACMOrtRBIeCKefa6Qlw9FHtRYV9mtqeo= +github.com/slok/noglog v0.2.0 h1:1czu4l2EoJ8L92UwdSXXa1Y+c5TIjFAFm2P+mjej95E= +github.com/slok/noglog v0.2.0/go.mod h1:TfKxwpEZPT+UA83bQ6RME146k0MM4e8mwHLf6bhcGDI= github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c/go.mod h1:XDJAKZRPZ1CvBcN2aX5YOUTYGHki24fSF0Iv48Ibg0s= github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= diff --git a/go/flags/endtoend/mysqlctl.txt b/go/flags/endtoend/mysqlctl.txt index f803b393a76..a49527d56dc 100644 --- a/go/flags/endtoend/mysqlctl.txt +++ b/go/flags/endtoend/mysqlctl.txt @@ -88,6 +88,8 @@ Flags: --service_map strings comma separated list of services to enable (or disable if prefixed with '-') Example: grpc-queryservice --socket_file string Local unix socket file to listen on --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --tablet_dir string The directory within the vtdataroot to store vttablet/mysql files. Defaults to being generated by the tablet uid. --tablet_uid uint32 Tablet UID. (default 41983) diff --git a/go/flags/endtoend/mysqlctld.txt b/go/flags/endtoend/mysqlctld.txt index d4bc347e882..a07dbe60229 100644 --- a/go/flags/endtoend/mysqlctld.txt +++ b/go/flags/endtoend/mysqlctld.txt @@ -118,6 +118,8 @@ Flags: --shutdown-wait-time duration How long to wait for mysqld shutdown (default 5m0s) --socket_file string Local unix socket file to listen on --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --tablet_dir string The directory within the vtdataroot to store vttablet/mysql files. Defaults to being generated by the tablet uid. --tablet_uid uint32 Tablet UID (default 41983) diff --git a/go/flags/endtoend/vtcombo.txt b/go/flags/endtoend/vtcombo.txt index 935d946d804..49ffcdbc04f 100644 --- a/go/flags/endtoend/vtcombo.txt +++ b/go/flags/endtoend/vtcombo.txt @@ -348,6 +348,8 @@ Flags: --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) --stream_buffer_size int the number of bytes sent from vtgate for each stream call. It's recommended to keep this value in sync with vttablet's query-server-config-stream-buffer-size. (default 32768) --stream_health_buffer_size uint max streaming health entries to buffer per streaming health client (default 20) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --table_gc_lifecycle string States for a DROP TABLE garbage collection cycle. Default is 'hold,purge,evac,drop', use any subset ('drop' implicitly always included) (default "hold,purge,evac,drop") --tablet-filter-tags StringMap Specifies a comma-separated list of tablet tags (as key:value pairs) to filter the tablets to watch. diff --git a/go/flags/endtoend/vtctld.txt b/go/flags/endtoend/vtctld.txt index 402c12d9c0f..a1f16466eb9 100644 --- a/go/flags/endtoend/vtctld.txt +++ b/go/flags/endtoend/vtctld.txt @@ -137,6 +137,8 @@ Flags: --stats_drop_variables string Variables to be dropped from the list of exported variables. --stats_emit_period duration Interval between emitting stats to all registered backends (default 1m0s) --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --tablet_dir string The directory within the vtdataroot to store vttablet/mysql files. Defaults to being generated by the tablet uid. --tablet_grpc_ca string the server ca to use to validate servers when connecting diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt index 1b6b911ddae..50b12905d8d 100644 --- a/go/flags/endtoend/vtgate.txt +++ b/go/flags/endtoend/vtgate.txt @@ -205,6 +205,8 @@ Flags: --statsd_sample_rate float Sample rate for statsd metrics (default 1) --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) --stream_buffer_size int the number of bytes sent from vtgate for each stream call. It's recommended to keep this value in sync with vttablet's query-server-config-stream-buffer-size. (default 32768) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --tablet-filter-tags StringMap Specifies a comma-separated list of tablet tags (as key:value pairs) to filter the tablets to watch. --tablet_filters strings Specifies a comma-separated list of 'keyspace|shard_name or keyrange' values to filter the tablets to watch. diff --git a/go/flags/endtoend/vtgateclienttest.txt b/go/flags/endtoend/vtgateclienttest.txt index b62a54edea8..bb1968cb053 100644 --- a/go/flags/endtoend/vtgateclienttest.txt +++ b/go/flags/endtoend/vtgateclienttest.txt @@ -67,6 +67,8 @@ Flags: --security_policy string the name of a registered security policy to use for controlling access to URLs - empty means allow all for anyone (built-in policies: deny-all, read-only) --service_map strings comma separated list of services to enable (or disable if prefixed with '-') Example: grpc-queryservice --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --v Level log level for V logs -v, --version print binary version diff --git a/go/flags/endtoend/vtorc.txt b/go/flags/endtoend/vtorc.txt index 7e2e6cfaf40..2e950419ddc 100644 --- a/go/flags/endtoend/vtorc.txt +++ b/go/flags/endtoend/vtorc.txt @@ -83,6 +83,8 @@ Flags: --stats_drop_variables string Variables to be dropped from the list of exported variables. --stats_emit_period duration Interval between emitting stats to all registered backends (default 1m0s) --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --tablet_manager_grpc_ca string the server ca to use to validate servers when connecting --tablet_manager_grpc_cert string the cert to use to connect diff --git a/go/flags/endtoend/vttablet.txt b/go/flags/endtoend/vttablet.txt index 568c0a4a201..67a634c3ade 100644 --- a/go/flags/endtoend/vttablet.txt +++ b/go/flags/endtoend/vttablet.txt @@ -348,6 +348,8 @@ Flags: --statsd_sample_rate float Sample rate for statsd metrics (default 1) --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) --stream_health_buffer_size uint max streaming health entries to buffer per streaming health client (default 20) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-acl-config string path to table access checker config file; send SIGHUP to reload this file --table-acl-config-reload-interval duration Ticker to reload ACLs. Duration flag, format e.g.: 30s. Default: do not reload --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class diff --git a/go/flags/endtoend/vttestserver.txt b/go/flags/endtoend/vttestserver.txt index d7fd7d46ce8..991d158d5bd 100644 --- a/go/flags/endtoend/vttestserver.txt +++ b/go/flags/endtoend/vttestserver.txt @@ -128,6 +128,8 @@ Flags: --sql-max-length-errors int truncate queries in error logs to the given length (default unlimited) --sql-max-length-ui int truncate queries in debug UIs to the given length (default 512) (default 512) --stderrthreshold severityFlag logs at or above this threshold go to stderr (default 1) + --structured-log-level logLevel The minimum log level, options: debug, info, warn, error. (default info) + --structured-logging Enable json-based structured logging --table-refresh-interval int interval in milliseconds to refresh tables in status page with refreshRequired class --tablet_dir string The directory within the vtdataroot to store vttablet/mysql files. Defaults to being generated by the tablet uid. --tablet_hostname string The hostname to use for the tablet otherwise it will be derived from OS' hostname (default "localhost") diff --git a/go/vt/logutil/logger.go b/go/vt/logutil/logger.go index 46d7c0052da..85cc263dbbd 100644 --- a/go/vt/logutil/logger.go +++ b/go/vt/logutil/logger.go @@ -17,15 +17,23 @@ limitations under the License. package logutil import ( + "flag" "fmt" "io" + "os" + "path/filepath" "runtime" "slices" "strings" "sync" "time" + noglog "github.com/slok/noglog" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "vitess.io/vitess/go/protoutil" + "vitess.io/vitess/go/vt/log" logutilpb "vitess.io/vitess/go/vt/proto/logutil" ) @@ -390,3 +398,94 @@ func fileAndLine(depth int) (string, int64) { } return file, int64(line) } + +// StructuredLoggingLevel defines the log level of structured logging. +var StructuredLoggingLevel = zapcore.InfoLevel + +// newZapLoggerConfig creates a new config for a zap logger that uses RFC3339 timestamps. +func newZapLoggerConfig() zap.Config { + conf := zap.NewProductionConfig() + conf.EncoderConfig.EncodeTime = zapcore.TimeEncoderOfLayout(time.RFC3339) + conf.Level = zap.NewAtomicLevelAt(StructuredLoggingLevel) + + // use --log_dir if provided + ld := flag.Lookup("log_dir") + if ld.Value != nil && ld.Value.String() != "" { + program := filepath.Base(os.Args[0]) + conf.OutputPaths = append( + conf.OutputPaths, + filepath.Join(ld.Value.String(), program+".log"), + ) + } + + return conf +} + +// ZapLogLevelFlag implements the pflag.Value interface, for parsing a zap log level string. +type ZapLogLevelFlag zapcore.Level + +// String represents a zapcore.Level as a lowercase string. +func (z *ZapLogLevelFlag) String() string { + level := zapcore.Level(*z) + return level.String() +} + +// Set is part of the pflag.Value interface. +func (z *ZapLogLevelFlag) Set(v string) error { + level, err := zapcore.ParseLevel(v) + if err == nil { + *z = ZapLogLevelFlag(level) + } + return err +} + +// Type is part of the pflag.Value interface. +func (z *ZapLogLevelFlag) Type() string { + return "logLevel" +} + +// SetStructuredLogger in-place noglog replacement with Zap's logger. +func SetStructuredLogger(conf *zap.Config) error { + // Use the passed configuration instead of + // the default configuration. + if conf == nil { + defaultConf := newZapLoggerConfig() + conf = &defaultConf + } + + // Build configuration and generate a sugared logger. + // Skip 3 callers so we log the real caller vs the + // noglog wrapper. + l, err := conf.Build(zap.AddCallerSkip(3)) + if err != nil { + return err + } + + logger := l.Sugar() + + noglog.SetLogger(&noglog.LoggerFunc{ + DebugfFunc: func(f string, a ...interface{}) { logger.Debugf(f, a...) }, + InfofFunc: func(f string, a ...interface{}) { logger.Infof(f, a...) }, + WarnfFunc: func(f string, a ...interface{}) { logger.Warnf(f, a...) }, + ErrorfFunc: func(f string, a ...interface{}) { logger.Errorf(f, a...) }, + }) + + log.Flush = noglog.Flush + log.Info = noglog.Info + log.Infof = noglog.Infof + log.InfoDepth = noglog.InfoDepth + log.Warning = noglog.Warning + log.Warningf = noglog.Warningf + log.WarningDepth = noglog.WarningDepth + log.Error = noglog.Error + log.Errorf = noglog.Errorf + log.ErrorDepth = noglog.ErrorDepth + log.Exit = noglog.Exit + log.Exitf = noglog.Exitf + log.ExitDepth = noglog.ExitDepth + log.Fatal = noglog.Fatal + log.Fatalf = noglog.Fatalf + log.FatalDepth = noglog.FatalDepth + + return nil +} diff --git a/go/vt/logutil/logger_test.go b/go/vt/logutil/logger_test.go index ce25543da5f..a2f3f24d6a3 100644 --- a/go/vt/logutil/logger_test.go +++ b/go/vt/logutil/logger_test.go @@ -17,9 +17,18 @@ limitations under the License. package logutil import ( + "bytes" + "encoding/json" + "net/url" "testing" "time" + "github.com/stretchr/testify/assert" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + + vtlog "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/protoutil" "vitess.io/vitess/go/race" logutilpb "vitess.io/vitess/go/vt/proto/logutil" @@ -152,3 +161,106 @@ func TestTeeLogger(t *testing.T) { } } } + +// MemorySink implements zap.Sink by writing all messages to a buffer. +// It's used to capture the logs. +type MemorySink struct { + *bytes.Buffer +} + +// Implement Close and Sync as no-ops to satisfy the interface. The Write +// method is provided by the embedded buffer. +func (s *MemorySink) Close() error { return nil } +func (s *MemorySink) Sync() error { return nil } + +func SetupLoggerWithMemSink() (sink *MemorySink, err error) { + // Create a sink instance, and register it with zap for the "memory" protocol. + sink = &MemorySink{new(bytes.Buffer)} + err = zap.RegisterSink("memory", func(*url.URL) (zap.Sink, error) { + return sink, nil + }) + if err != nil { + return nil, err + } + + testLoggerConf := NewTestMemorySinkConfig() + err = SetStructuredLogger(&testLoggerConf) + return sink, err +} + +func NewTestMemorySinkConfig() zap.Config { + conf := newZapLoggerConfig() + conf.OutputPaths = []string{"memory://"} + conf.ErrorOutputPaths = []string{"memory://"} + return conf +} + +func TestStructuredLogger_Replacing_glog(t *testing.T) { + type logMsg struct { + Level string `json:"level"` + Msg string `json:"msg"` + Caller string `json:"caller"` + Stacktrace string `json:"stacktrace"` + Timestamp string `json:"ts"` + } + + type testCase struct { + name string + logLevel zapcore.Level + } + + dummyLogMessage := "testing log" + testCases := []testCase{ + {"log debug", zap.DebugLevel}, + {"log info", zap.InfoLevel}, + {"log warn", zap.WarnLevel}, + {"log error", zap.ErrorLevel}, + } + + sink, err := SetupLoggerWithMemSink() + assert.NoError(t, err) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var loggingFunc func(format string, args ...interface{}) + var expectedLevel string + var expectStacktrace bool + + switch tc.logLevel { + case zapcore.InfoLevel, zapcore.DebugLevel: + loggingFunc = vtlog.Infof + expectedLevel = "info" + case zapcore.WarnLevel: + loggingFunc = vtlog.Warningf + expectedLevel = "warn" + case zapcore.ErrorLevel: + loggingFunc = vtlog.Errorf + expectedLevel = "error" + expectStacktrace = true + } + + loggingFunc(dummyLogMessage) + + // Unmarshal the captured log. This means we're getting a struct log. + actualLog := logMsg{} + err = json.Unmarshal(sink.Bytes(), &actualLog) + assert.NoError(t, err) + // Reset the sink so that it'll contain one log per test case. + sink.Reset() + + assert.Equal(t, expectedLevel, actualLog.Level) + assert.Equal(t, dummyLogMessage, actualLog.Msg) + if expectStacktrace { + assert.NotEmpty(t, actualLog.Stacktrace) + } + + // confirm RFC3339 timestamp + assert.NotEmpty(t, actualLog.Timestamp) + _, err = time.Parse(time.RFC3339, actualLog.Timestamp) + assert.NoError(t, err) + + // confirm caller + assert.Contains(t, actualLog.Caller, "logutil/logger_test.go") + }) + } +} diff --git a/go/vt/servenv/servenv.go b/go/vt/servenv/servenv.go index 42ce4a9cf12..9ecf241979d 100644 --- a/go/vt/servenv/servenv.go +++ b/go/vt/servenv/servenv.go @@ -79,6 +79,7 @@ var ( maxStackSize = 64 * 1024 * 1024 initStartTime time.Time // time when tablet init started: for debug purposes to time how long a tablet init takes tableRefreshInterval int + useStructuredLogger bool ) type TimeoutFlags struct { @@ -107,7 +108,11 @@ func RegisterFlags() { fs.IntVar(&tableRefreshInterval, "table-refresh-interval", tableRefreshInterval, "interval in milliseconds to refresh tables in status page with refreshRequired class") // pid_file.go - fs.StringVar(&pidFile, "pid_file", pidFile, "If set, the process will write its pid to the named file, and delete it on graceful shutdown.") + fs.StringVar(&pidFile, "pid_file", pidFile, "If set, the process will write its pid to the named file, and delete it on graceful shutdown.") // Logging + + // Logging + fs.BoolVar(&useStructuredLogger, "structured-logging", useStructuredLogger, "Enable json-based structured logging") + fs.Var((*logutil.ZapLogLevelFlag)(&logutil.StructuredLoggingLevel), "structured-log-level", "The minimum log level, options: debug, info, warn, error.") }) } @@ -123,6 +128,10 @@ func RegisterFlagsWithTimeouts(tf *TimeoutFlags) { // pid_file.go fs.StringVar(&pidFile, "pid_file", pidFile, "If set, the process will write its pid to the named file, and delete it on graceful shutdown.") + // Logging + fs.BoolVar(&useStructuredLogger, "structured-logging", useStructuredLogger, "Enable json-based structured logging") + fs.Var((*logutil.ZapLogLevelFlag)(&logutil.StructuredLoggingLevel), "structured-log-level", "The minimum log level, options: debug, info, warn, error.") + timeouts = tf }) } @@ -310,6 +319,13 @@ func ParseFlags(cmd string) { os.Exit(0) } + if useStructuredLogger { + // Replace glog logger with zap logger + if err := logutil.SetStructuredLogger(nil); err != nil { + log.Exitf("error while setting the structured logger: %s", err) + } + } + args := fs.Args() if len(args) > 0 { _flag.Usage() @@ -420,6 +436,13 @@ func ParseFlagsWithArgs(cmd string) []string { os.Exit(0) } + if useStructuredLogger { + // Replace glog logger with zap logger + if err := logutil.SetStructuredLogger(nil); err != nil { + log.Exitf("error while setting the structured logger: %s", err) + } + } + args := fs.Args() if len(args) == 0 { log.Exitf("%s expected at least one positional argument", cmd) diff --git a/go/vt/servenv/servenv_unix.go b/go/vt/servenv/servenv_unix.go index 17fa85c4167..745cdad8af7 100644 --- a/go/vt/servenv/servenv_unix.go +++ b/go/vt/servenv/servenv_unix.go @@ -27,6 +27,7 @@ import ( "vitess.io/vitess/go/stats" "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/logutil" ) // Init is the first phase of the server startup. @@ -40,6 +41,13 @@ func Init() { return int64(time.Since(serverStart).Nanoseconds()) }) + if useStructuredLogger { + // Replace glog logger with zap logger + if err := logutil.SetStructuredLogger(nil); err != nil { + log.Exitf("error while setting the structured logger: %s", err) + } + } + // Ignore SIGPIPE if specified // The Go runtime catches SIGPIPE for us on all fds except stdout/stderr // See https://golang.org/pkg/os/signal/#hdr-SIGPIPE diff --git a/go/vt/servenv/servenv_windows.go b/go/vt/servenv/servenv_windows.go index bd610b1f245..4c3ee0f6318 100644 --- a/go/vt/servenv/servenv_windows.go +++ b/go/vt/servenv/servenv_windows.go @@ -18,4 +18,16 @@ limitations under the License. package servenv -func Init() {} +import ( + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/logutil" +) + +func Init() { + if useStructuredLogger { + // Replace glog logger with zap logger + if err := logutil.SetStructuredLogger(nil); err != nil { + log.Exitf("error while setting the structured logger: %s", err) + } + } +}