Skip to content

Commit

Permalink
feat: suppress controller runtime first N failures on the console
Browse files Browse the repository at this point in the history
As the controllers might fail with transient errors on machine startup,
but errors are always retried, persisten errors will anyway show up in
the console.

The full `talosctl logs controller-runtime` are not suppressed.

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Jul 5, 2024
1 parent 3d35e54 commit 0454130
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,13 @@ func (ctrl *Controller) makeLogger(s string) (*zap.Logger, error) {
}

return logging.ZapLogger(
logging.NewLogDestination(logWriter, zapcore.DebugLevel, logging.WithColoredLevels()),
logging.NewLogDestination(logging.StdWriter, ctrl.consoleLogLevel, logging.WithoutTimestamp(), logging.WithoutLogLevels()),
logging.NewLogDestination(logWriter, zapcore.DebugLevel,
logging.WithColoredLevels(),
),
logging.NewLogDestination(logging.StdWriter, ctrl.consoleLogLevel,
logging.WithoutTimestamp(),
logging.WithoutLogLevels(),
logging.WithControllerErrorSuppressor(constants.ConsoleLogErrorSuppressThreshold),
),
).With(logging.Component(s)), nil
}
2 changes: 1 addition & 1 deletion internal/pkg/ntp/ntp.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func (syncer *Syncer) resolveServers(ctx context.Context) ([]string, error) {
} else {
ips, err := net.LookupIP(server)
if err != nil {
syncer.logger.Warn(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err))
syncer.logger.Error(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err))
}

for _, ip := range ips {
Expand Down
79 changes: 79 additions & 0 deletions pkg/logging/error_suppress.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package logging

import (
"sync/atomic"

"go.uber.org/zap/zapcore"
)

// NewControllerErrorSuppressor creates a new controller error suppressor.
//
// It suppresses error logs for a given controller unless it logs >= threshold errors.
// The idea is that all controllers reconcile errors, so if the error is not transient,
// it will be reported enough time to hit the threshold, but transient errors will be
// suppressed.
//
// The suppressor records the controller name by inspecting a log field named "controller"
// passed via `logger.With()` call.
func NewControllerErrorSuppressor(core zapcore.Core, threshold int64) zapcore.Core {
return &consoleSampler{
Core: core,
threshold: threshold,
}
}

type consoleSampler struct {
zapcore.Core

hits *atomic.Int64
threshold int64
controller string
}

var _ zapcore.Core = (*consoleSampler)(nil)

func (s *consoleSampler) Level() zapcore.Level {
return zapcore.LevelOf(s.Core)
}

func (s *consoleSampler) With(fields []zapcore.Field) zapcore.Core {
controller := s.controller
num := s.hits

for _, field := range fields {
if field.Key == "controller" {
if field.String != controller {
controller = field.String
num = new(atomic.Int64)
}

break
}
}

return &consoleSampler{
threshold: s.threshold,
controller: controller,
hits: num,
Core: s.Core.With(fields),
}
}

func (s *consoleSampler) Check(ent zapcore.Entry, ce *zapcore.CheckedEntry) *zapcore.CheckedEntry {
if !s.Enabled(ent.Level) {
return ce
}

if ent.Level == zapcore.ErrorLevel && s.controller != "" {
if s.hits.Add(1) <= s.threshold {
// suppress the log
return ce
}
}

return s.Core.Check(ent, ce)
}
67 changes: 67 additions & 0 deletions pkg/logging/error_suppress_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package logging_test

import (
"testing"

"github.com/stretchr/testify/assert"
"go.uber.org/zap/zapcore"
"go.uber.org/zap/zaptest/observer"

"github.com/siderolabs/talos/pkg/logging"
)

func assertLogged(t *testing.T, core zapcore.Core, logs *observer.ObservedLogs, entries []zapcore.Entry, expectedCount int) {
t.Helper()

for _, entry := range entries {
if ce := core.Check(entry, nil); ce != nil {
ce.Write()
}
}

assert.Len(t, logs.TakeAll(), expectedCount)
}

func TestErrorSuppressor(t *testing.T) {
t.Parallel()

core, logs := observer.New(zapcore.InfoLevel)

const threshold = 2

core = logging.NewControllerErrorSuppressor(core, threshold)

// warn/info messages are not affected
assertLogged(t, core, logs, []zapcore.Entry{
{Level: zapcore.InfoLevel, Message: "abc"},
{Level: zapcore.WarnLevel, Message: "def"},
{Level: zapcore.DebugLevel, Message: "message"}, // below level
}, 2)

// different controllers, suppress counters are independent
controllerCore1 := core.With([]zapcore.Field{{Key: "controller", String: "c1"}})
controllerCore2 := core.With([]zapcore.Field{{Key: "controller", String: "c2"}})

assertLogged(t, controllerCore1, logs, []zapcore.Entry{
{Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed
{Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed
{Level: zapcore.ErrorLevel, Message: "controller failed"},
}, 1)

assertLogged(t, controllerCore2, logs, []zapcore.Entry{
{Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed
{Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed
}, 0)

assertLogged(t, controllerCore1, logs, []zapcore.Entry{
{Level: zapcore.ErrorLevel, Message: "controller failed"}, // not suppressed, over threshold
}, 1)

assertLogged(t, controllerCore1.With([]zapcore.Field{{Key: "foo", String: "bar"}}), logs, []zapcore.Entry{
{Level: zapcore.ErrorLevel, Message: "controller failed"}, // .With() without 'controller' field keeps the counter value from the parent
}, 1)
}
58 changes: 37 additions & 21 deletions pkg/logging/zap.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,50 +62,60 @@ var StdWriter = &logWrapper{nil}
// LogDestination defines logging destination Config.
type LogDestination struct {
// Level log level.
level zapcore.LevelEnabler
writer io.Writer
config zapcore.EncoderConfig
level zapcore.LevelEnabler
writer io.Writer
config zapcore.EncoderConfig
suppressThreshold int64
}

// EncoderOption defines a log destination encoder config setter.
type EncoderOption func(config *zapcore.EncoderConfig)
// LogDestinationOption defines a log destination encoder config setter.
type LogDestinationOption func(dest *LogDestination)

// WithoutTimestamp disables timestamp.
func WithoutTimestamp() EncoderOption {
return func(config *zapcore.EncoderConfig) {
config.EncodeTime = nil
func WithoutTimestamp() LogDestinationOption {
return func(dest *LogDestination) {
dest.config.EncodeTime = nil
}
}

// WithoutLogLevels disable log level.
func WithoutLogLevels() EncoderOption {
return func(config *zapcore.EncoderConfig) {
config.EncodeLevel = nil
func WithoutLogLevels() LogDestinationOption {
return func(dest *LogDestination) {
dest.config.EncodeLevel = nil
}
}

// WithColoredLevels enables log level colored output.
func WithColoredLevels() EncoderOption {
return func(config *zapcore.EncoderConfig) {
config.EncodeLevel = zapcore.CapitalColorLevelEncoder
func WithColoredLevels() LogDestinationOption {
return func(dest *LogDestination) {
dest.config.EncodeLevel = zapcore.CapitalColorLevelEncoder
}
}

// WithControllerErrorSuppressor creates a new console log controller error suppressor.
func WithControllerErrorSuppressor(threshold int64) LogDestinationOption {
return func(dest *LogDestination) {
dest.suppressThreshold = threshold
}
}

// NewLogDestination creates new log destination.
func NewLogDestination(writer io.Writer, logLevel zapcore.LevelEnabler, options ...EncoderOption) *LogDestination {
func NewLogDestination(writer io.Writer, logLevel zapcore.LevelEnabler, options ...LogDestinationOption) *LogDestination {
config := zap.NewDevelopmentEncoderConfig()
config.ConsoleSeparator = " "
config.StacktraceKey = "error"

for _, option := range options {
option(&config)
}

return &LogDestination{
dest := &LogDestination{
level: logLevel,
config: config,
writer: writer,
}

for _, option := range options {
option(dest)
}

return dest
}

// Wrap is a simple helper to wrap io.Writer with default arguments.
Expand All @@ -122,11 +132,17 @@ func ZapLogger(dests ...*LogDestination) *zap.Logger {
}

cores := xslices.Map(dests, func(dest *LogDestination) zapcore.Core {
return zapcore.NewCore(
core := zapcore.NewCore(
zapcore.NewConsoleEncoder(dest.config),
zapcore.AddSync(dest.writer),
dest.level,
)

if dest.suppressThreshold > 0 {
core = NewControllerErrorSuppressor(core, dest.suppressThreshold)
}

return core
})

return zap.New(zapcore.NewTee(cores...))
Expand Down
3 changes: 3 additions & 0 deletions pkg/machinery/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,9 @@ const (

// MinimumGOAMD64Level is the minimum x86_64 microarchitecture level required by Talos.
MinimumGOAMD64Level = 2

// ConsoleLogErrorSuppressThreshold is the threshold for suppressing console log errors.
ConsoleLogErrorSuppressThreshold = 4
)

// See https://linux.die.net/man/3/klogctl
Expand Down

0 comments on commit 0454130

Please sign in to comment.