diff --git a/CHANGELOG.md b/CHANGELOG.md index 107a074ed8..cd4f97a2c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file. ### Changed - The `parse_parquet` Bloblang function, `parquet_decode`, `parquet_encode` processors and the `parquet` input have all been upgraded to the latest version of the underlying Parquet library. Since this underlying library is experimental it is likely that behaviour changes will result. One significant change is that encoding numerical values that are larger than the column type (`float64` into `FLOAT`, `int64` into `INT32`, etc) will no longer be automatically converted. +- The `parse_log` processor field `codec` is now deprecated. ## 4.24.0 - 2023-11-24 diff --git a/internal/batch/policy/batchconfig/config.go b/internal/batch/policy/batchconfig/config.go index 4d620950fc..011d215f2e 100644 --- a/internal/batch/policy/batchconfig/config.go +++ b/internal/batch/policy/batchconfig/config.go @@ -1,8 +1,6 @@ package batchconfig import ( - "gopkg.in/yaml.v3" - "github.com/benthosdev/benthos/v4/internal/component/processor" ) @@ -26,22 +24,6 @@ func NewConfig() Config { } } -// FromAny attempts to extract a Config from any value. -func FromAny(v any) (conf Config, err error) { - conf = NewConfig() - if pNode, ok := v.(*yaml.Node); ok { - err = pNode.Decode(&conf) - return - } - - var node yaml.Node - if err = node.Encode(v); err != nil { - return - } - err = node.Decode(&conf) - return -} - // IsNoop returns true if this batch policy configuration does nothing. func (p Config) IsNoop() bool { if p.ByteSize > 0 { diff --git a/internal/batch/policy/batchconfig/config_test.go b/internal/batch/policy/batchconfig/config_test.go deleted file mode 100644 index 0587ed541b..0000000000 --- a/internal/batch/policy/batchconfig/config_test.go +++ /dev/null @@ -1,124 +0,0 @@ -package batchconfig_test - -import ( - "testing" - - "gopkg.in/yaml.v3" - - "github.com/benthosdev/benthos/v4/internal/batch/policy/batchconfig" - "github.com/benthosdev/benthos/v4/internal/component/processor" - _ "github.com/benthosdev/benthos/v4/public/components/pure" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestConfigFromAny(t *testing.T) { - fromDefault := func(fn func(o *batchconfig.Config)) batchconfig.Config { - c := batchconfig.NewConfig() - fn(&c) - return c - } - - tests := []struct { - name string - input any - output batchconfig.Config - }{ - { - name: "map values", - input: map[string]any{ - "count": 11, - "period": "1s", - }, - output: fromDefault(func(o *batchconfig.Config) { - o.Count = 11 - o.Period = "1s" - }), - }, - { - name: "map values with processors", - input: map[string]any{ - "count": 12, - "processors": []any{ - map[string]any{ - "label": "fooproc", - "noop": struct{}{}, - }, - map[string]any{ - "label": "barproc", - "noop": struct{}{}, - }, - }, - }, - output: fromDefault(func(o *batchconfig.Config) { - o.Count = 12 - - fooConf := processor.NewConfig() - fooConf.Label = "fooproc" - fooConf.Type = "noop" - - barConf := processor.NewConfig() - barConf.Label = "barproc" - barConf.Type = "noop" - - o.Processors = append(o.Processors, fooConf, barConf) - }), - }, - { - name: "node values", - input: func() any { - var n yaml.Node - require.NoError(t, n.Encode(map[string]any{ - "count": 11, - "period": "1s", - })) - return &n - }(), - output: fromDefault(func(o *batchconfig.Config) { - o.Count = 11 - o.Period = "1s" - }), - }, - { - name: "map values with node processors", - input: func() any { - var fooProcNode, barProcNode yaml.Node - require.NoError(t, fooProcNode.Encode(map[string]any{ - "label": "fooproc", - "noop": struct{}{}, - })) - require.NoError(t, barProcNode.Encode(map[string]any{ - "label": "barproc", - "noop": struct{}{}, - })) - return map[string]any{ - "count": 12, - "processors": []any{&fooProcNode, &barProcNode}, - } - }(), - output: fromDefault(func(o *batchconfig.Config) { - o.Count = 12 - - fooConf := processor.NewConfig() - fooConf.Label = "fooproc" - fooConf.Type = "noop" - - barConf := processor.NewConfig() - barConf.Label = "barproc" - barConf.Type = "noop" - - o.Processors = append(o.Processors, fooConf, barConf) - }), - }, - } - - for _, test := range tests { - test := test - t.Run(test.name, func(t *testing.T) { - res, err := batchconfig.FromAny(test.input) - require.NoError(t, err) - assert.Equal(t, test.output, res) - }) - } -} diff --git a/internal/bundle/tracing/bundle_test.go b/internal/bundle/tracing/bundle_test.go index a46c49d0ae..22f891307b 100644 --- a/internal/bundle/tracing/bundle_test.go +++ b/internal/bundle/tracing/bundle_test.go @@ -333,7 +333,7 @@ func TestBundleOutputWithProcessorsTracing(t *testing.T) { blobConf := processor.NewConfig() blobConf.Type = "bloblang" - blobConf.Bloblang = "root = content().uppercase()" + blobConf.Plugin = "root = content().uppercase()" outConfig.Processors = append(outConfig.Processors, blobConf) mgr, err := manager.New( @@ -502,7 +502,7 @@ func TestBundleProcessorTracing(t *testing.T) { procConfig := processor.NewConfig() procConfig.Label = "foo" procConfig.Type = "bloblang" - procConfig.Bloblang = ` + procConfig.Plugin = ` let ctr = content().number() root.count = if $ctr % 2 == 0 { throw("nah %v".format($ctr)) } else { $ctr } meta bar = "new bar value" @@ -575,7 +575,7 @@ func TestBundleProcessorTracingError(t *testing.T) { procConfig := processor.NewConfig() procConfig.Label = "foo" procConfig.Type = "bloblang" - procConfig.Bloblang = `let nope` + procConfig.Plugin = `let nope` mgr, err := manager.New( manager.NewResourceConfig(), @@ -597,7 +597,7 @@ func TestBundleProcessorTracingDisabled(t *testing.T) { procConfig := processor.NewConfig() procConfig.Label = "foo" procConfig.Type = "bloblang" - procConfig.Bloblang = ` + procConfig.Plugin = ` let ctr = content().number() root.count = if $ctr % 2 == 0 { throw("nah %v".format($ctr)) } else { $ctr } meta bar = "new bar value" diff --git a/internal/cli/test/case_test.go b/internal/cli/test/case_test.go index 740abf1576..982ddc74bd 100644 --- a/internal/cli/test/case_test.go +++ b/internal/cli/test/case_test.go @@ -50,7 +50,7 @@ func TestCase(t *testing.T) { procConf = processor.NewConfig() procConf.Type = "bloblang" - procConf.Bloblang = `root = content().uppercase()` + procConf.Plugin = `root = content().uppercase()` if proc, err = mock.NewManager().NewProcessor(procConf); err != nil { t.Fatal(err) } @@ -58,7 +58,7 @@ func TestCase(t *testing.T) { procConf = processor.NewConfig() procConf.Type = "bloblang" - procConf.Bloblang = `root = deleted()` + procConf.Plugin = `root = deleted()` if proc, err = mock.NewManager().NewProcessor(procConf); err != nil { t.Fatal(err) } @@ -66,7 +66,7 @@ func TestCase(t *testing.T) { procConf = processor.NewConfig() procConf.Type = "bloblang" - procConf.Bloblang = `root = if batch_index() == 0 { count("batch_id") }` + procConf.Plugin = `root = if batch_index() == 0 { count("batch_id") }` if proc, err = mock.NewManager().NewProcessor(procConf); err != nil { t.Fatal(err) } @@ -277,7 +277,7 @@ func TestFileCaseInputs(t *testing.T) { procConf := processor.NewConfig() procConf.Type = "bloblang" - procConf.Bloblang = `root = "hello world " + content().string()` + procConf.Plugin = `root = "hello world " + content().string()` proc, err := mock.NewManager().NewProcessor(procConf) require.NoError(t, err) @@ -336,7 +336,7 @@ func TestFileCaseConditions(t *testing.T) { procConf := processor.NewConfig() procConf.Type = "bloblang" - procConf.Bloblang = `root = content().uppercase()` + procConf.Plugin = `root = content().uppercase()` proc, err := mock.NewManager().NewProcessor(procConf) require.NoError(t, err) diff --git a/internal/component/buffer/config.go b/internal/component/buffer/config.go index 95f5ffd06a..a84b91d5a0 100644 --- a/internal/component/buffer/config.go +++ b/internal/component/buffer/config.go @@ -1,6 +1,8 @@ package buffer import ( + "fmt" + yaml "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/docs" @@ -51,3 +53,46 @@ func (conf *Config) UnmarshalYAML(value *yaml.Node) error { *conf = Config(aliased) return nil } + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeBuffer, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeBuffer, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/component/cache/config.go b/internal/component/cache/config.go index dbde83e7e9..f3a0e04d53 100644 --- a/internal/component/cache/config.go +++ b/internal/component/cache/config.go @@ -1,6 +1,8 @@ package cache import ( + "fmt" + "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/docs" @@ -55,3 +57,55 @@ func (conf *Config) UnmarshalYAML(value *yaml.Node) error { *conf = Config(aliased) return nil } + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeCache, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + conf.Label, _ = value["label"].(string) + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeCache, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + for i := 0; i < len(value.Content)-1; i += 2 { + if value.Content[i].Value == "label" { + conf.Label = value.Content[i+1].Value + break + } + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/component/input/config.go b/internal/component/input/config.go index 41943c4487..88b78623aa 100644 --- a/internal/component/input/config.go +++ b/internal/component/input/config.go @@ -1,6 +1,8 @@ package input import ( + "fmt" + yaml "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/component/processor" @@ -64,3 +66,80 @@ func FromYAML(confStr string) (conf Config, err error) { err = yaml.Unmarshal([]byte(confStr), &conf) return } + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeInput, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + conf.Label, _ = value["label"].(string) + + if procV, exists := value["processors"]; exists { + procArr, ok := procV.([]any) + if !ok { + err = fmt.Errorf("processors: unexpected value, expected array got %T", procV) + return + } + for i, pv := range procArr { + var tmpProc processor.Config + if tmpProc, err = processor.FromAny(prov, pv); err != nil { + err = fmt.Errorf("%v: %w", i, err) + return + } + conf.Processors = append(conf.Processors, tmpProc) + } + } + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeInput, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + for i := 0; i < len(value.Content)-1; i += 2 { + switch value.Content[i].Value { + case "label": + conf.Label = value.Content[i+1].Value + case "processors": + for i, n := range value.Content[i+1].Content { + var tmpProc processor.Config + if tmpProc, err = processor.FromAny(prov, n); err != nil { + err = fmt.Errorf("%v: %w", i, err) + return + } + conf.Processors = append(conf.Processors, tmpProc) + } + } + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/component/interop/interop.go b/internal/component/interop/interop.go index e4ea4d6f22..8b1cc1ed7b 100644 --- a/internal/component/interop/interop.go +++ b/internal/component/interop/interop.go @@ -58,6 +58,16 @@ func (u *UnwrapInternalInput) Close(ctx context.Context) error { //------------------------------------------------------------------------------ +// UnwrapOwnedProcessor attempts to unwrap a public owned component into an +// internal variant. This is useful in cases where we're migrating internal +// components to use the public configuration APIs but aren't quite ready to +// move the full implementation yet. +func UnwrapOwnedProcessor(o *service.OwnedProcessor) processor.V1 { + return o.XUnwrapper().(interface { + Unwrap() processor.V1 + }).Unwrap() +} + // UnwrapInternalBatchProcessor is a no-op implementation of an internal // component that allows a public/service environment to unwrap it straight into // the needed format during construction. This is useful in cases where we're diff --git a/internal/component/metrics/config.go b/internal/component/metrics/config.go index 2f8ba99b49..4d7aee9f99 100644 --- a/internal/component/metrics/config.go +++ b/internal/component/metrics/config.go @@ -1,6 +1,8 @@ package metrics import ( + "fmt" + "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/docs" @@ -54,3 +56,46 @@ func (conf *Config) UnmarshalYAML(value *yaml.Node) error { *conf = Config(aliased) return nil } + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeMetrics, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeMetrics, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/component/output/config.go b/internal/component/output/config.go index de99bdfcd6..1e03362705 100644 --- a/internal/component/output/config.go +++ b/internal/component/output/config.go @@ -1,6 +1,8 @@ package output import ( + "fmt" + "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/component/processor" @@ -64,3 +66,80 @@ func FromYAML(confStr string) (conf Config, err error) { err = yaml.Unmarshal([]byte(confStr), &conf) return } + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeOutput, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + conf.Label, _ = value["label"].(string) + + if procV, exists := value["processors"]; exists { + procArr, ok := procV.([]any) + if !ok { + err = fmt.Errorf("processors: unexpected value, expected array got %T", procV) + return + } + for i, pv := range procArr { + var tmpProc processor.Config + if tmpProc, err = processor.FromAny(prov, pv); err != nil { + err = fmt.Errorf("%v: %w", i, err) + return + } + conf.Processors = append(conf.Processors, tmpProc) + } + } + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeOutput, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + for i := 0; i < len(value.Content)-1; i += 2 { + switch value.Content[i].Value { + case "label": + conf.Label = value.Content[i+1].Value + case "processors": + for i, n := range value.Content[i+1].Content { + var tmpProc processor.Config + if tmpProc, err = processor.FromAny(prov, n); err != nil { + err = fmt.Errorf("%v: %w", i, err) + return + } + conf.Processors = append(conf.Processors, tmpProc) + } + } + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/component/output/wrap_with_pipeline_test.go b/internal/component/output/wrap_with_pipeline_test.go index b895586121..eaeb54b1a4 100644 --- a/internal/component/output/wrap_with_pipeline_test.go +++ b/internal/component/output/wrap_with_pipeline_test.go @@ -125,14 +125,18 @@ func TestBasicWrapPipelinesOrdering(t *testing.T) { mockOut := &mockOutput{} - firstProc := processor.NewConfig() - firstProc.Type = "insert_part" - firstProc.InsertPart.Content = "foo" - firstProc.InsertPart.Index = 0 - - secondProc := processor.NewConfig() - secondProc.Type = "select_parts" - secondProc.SelectParts.Parts = []int{0} + firstProc, err := processor.FromYAML(` +insert_part: + content: foo + index: 0 +`) + require.NoError(t, err) + + secondProc, err := processor.FromYAML(` +select_parts: + parts: [ 0 ] +`) + require.NoError(t, err) conf := output.NewConfig() conf.Processors = append(conf.Processors, firstProc) diff --git a/internal/component/processor/config.go b/internal/component/processor/config.go index 7a21406a87..c0bdfd2a4e 100644 --- a/internal/component/processor/config.go +++ b/internal/component/processor/config.go @@ -1,6 +1,8 @@ package processor import ( + "fmt" + yaml "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/docs" @@ -10,42 +12,9 @@ import ( // Deprecated: Do not add new components here. Instead, use the public plugin // APIs. Examples can be found in: ./internal/impl. type Config struct { - Label string `json:"label" yaml:"label"` - Type string `json:"type" yaml:"type"` - Bloblang string `json:"bloblang" yaml:"bloblang"` - BoundsCheck BoundsCheckConfig `json:"bounds_check" yaml:"bounds_check"` - Branch BranchConfig `json:"branch" yaml:"branch"` - Cache CacheConfig `json:"cache" yaml:"cache"` - Catch []Config `json:"catch" yaml:"catch"` - Compress CompressConfig `json:"compress" yaml:"compress"` - Decompress DecompressConfig `json:"decompress" yaml:"decompress"` - Dedupe DedupeConfig `json:"dedupe" yaml:"dedupe"` - ForEach []Config `json:"for_each" yaml:"for_each"` - Grok GrokConfig `json:"grok" yaml:"grok"` - GroupBy GroupByConfig `json:"group_by" yaml:"group_by"` - GroupByValue GroupByValueConfig `json:"group_by_value" yaml:"group_by_value"` - InsertPart InsertPartConfig `json:"insert_part" yaml:"insert_part"` - JMESPath JMESPathConfig `json:"jmespath" yaml:"jmespath"` - JQ JQConfig `json:"jq" yaml:"jq"` - JSONSchema JSONSchemaConfig `json:"json_schema" yaml:"json_schema"` - Log LogConfig `json:"log" yaml:"log"` - Metric MetricConfig `json:"metric" yaml:"metric"` - Noop struct{} `json:"noop" yaml:"noop"` - Plugin any `json:"plugin,omitempty" yaml:"plugin,omitempty"` - Parallel ParallelConfig `json:"parallel" yaml:"parallel"` - ParseLog ParseLogConfig `json:"parse_log" yaml:"parse_log"` - RateLimit RateLimitConfig `json:"rate_limit" yaml:"rate_limit"` - Resource string `json:"resource" yaml:"resource"` - SelectParts SelectPartsConfig `json:"select_parts" yaml:"select_parts"` - Sleep SleepConfig `json:"sleep" yaml:"sleep"` - Split SplitConfig `json:"split" yaml:"split"` - Subprocess SubprocessConfig `json:"subprocess" yaml:"subprocess"` - Switch SwitchConfig `json:"switch" yaml:"switch"` - SyncResponse struct{} `json:"sync_response" yaml:"sync_response"` - Try []Config `json:"try" yaml:"try"` - While WhileConfig `json:"while" yaml:"while"` - Workflow WorkflowConfig `json:"workflow" yaml:"workflow"` - XML XMLConfig `json:"xml" yaml:"xml"` + Label string `json:"label" yaml:"label"` + Type string `json:"type" yaml:"type"` + Plugin any `json:"plugin,omitempty" yaml:"plugin,omitempty"` } // NewConfig returns a configuration struct fully populated with default values. @@ -53,42 +22,9 @@ type Config struct { // APIs. Examples can be found in: ./internal/impl. func NewConfig() Config { return Config{ - Label: "", - Type: "bounds_check", - Bloblang: "", - BoundsCheck: NewBoundsCheckConfig(), - Branch: NewBranchConfig(), - Cache: NewCacheConfig(), - Catch: []Config{}, - Compress: NewCompressConfig(), - Decompress: NewDecompressConfig(), - Dedupe: NewDedupeConfig(), - ForEach: []Config{}, - Grok: NewGrokConfig(), - GroupBy: NewGroupByConfig(), - GroupByValue: NewGroupByValueConfig(), - InsertPart: NewInsertPartConfig(), - JMESPath: NewJMESPathConfig(), - JQ: NewJQConfig(), - JSONSchema: NewJSONSchemaConfig(), - Log: NewLogConfig(), - Metric: NewMetricConfig(), - Noop: struct{}{}, - Plugin: nil, - Parallel: NewParallelConfig(), - ParseLog: NewParseLogConfig(), - RateLimit: NewRateLimitConfig(), - Resource: "", - SelectParts: NewSelectPartsConfig(), - Sleep: NewSleepConfig(), - Split: NewSplitConfig(), - Subprocess: NewSubprocessConfig(), - Switch: NewSwitchConfig(), - SyncResponse: struct{}{}, - Try: []Config{}, - While: NewWhileConfig(), - Workflow: NewWorkflowConfig(), - XML: NewXMLConfig(), + Label: "", + Type: "bounds_check", + Plugin: nil, } } @@ -121,3 +57,61 @@ func (conf *Config) UnmarshalYAML(value *yaml.Node) error { *conf = Config(aliased) return nil } + +// FromYAML is for old style tests. +func FromYAML(confStr string) (conf Config, err error) { + err = yaml.Unmarshal([]byte(confStr), &conf) + return +} + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeProcessor, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + conf.Label, _ = value["label"].(string) + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeProcessor, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + for i := 0; i < len(value.Content)-1; i += 2 { + if value.Content[i].Value == "label" { + conf.Label = value.Content[i+1].Value + break + } + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/component/processor/config_bounds_check.go b/internal/component/processor/config_bounds_check.go deleted file mode 100644 index 57d9800681..0000000000 --- a/internal/component/processor/config_bounds_check.go +++ /dev/null @@ -1,20 +0,0 @@ -package processor - -// BoundsCheckConfig contains configuration fields for the BoundsCheck -// processor. -type BoundsCheckConfig struct { - MaxParts int `json:"max_parts" yaml:"max_parts"` - MinParts int `json:"min_parts" yaml:"min_parts"` - MaxPartSize int `json:"max_part_size" yaml:"max_part_size"` - MinPartSize int `json:"min_part_size" yaml:"min_part_size"` -} - -// NewBoundsCheckConfig returns a BoundsCheckConfig with default values. -func NewBoundsCheckConfig() BoundsCheckConfig { - return BoundsCheckConfig{ - MaxParts: 100, - MinParts: 1, - MaxPartSize: 1 * 1024 * 1024 * 1024, // 1GB - MinPartSize: 1, - } -} diff --git a/internal/component/processor/config_branch.go b/internal/component/processor/config_branch.go deleted file mode 100644 index e6caadd887..0000000000 --- a/internal/component/processor/config_branch.go +++ /dev/null @@ -1,17 +0,0 @@ -package processor - -// BranchConfig contains configuration fields for the Branch processor. -type BranchConfig struct { - RequestMap string `json:"request_map" yaml:"request_map"` - Processors []Config `json:"processors" yaml:"processors"` - ResultMap string `json:"result_map" yaml:"result_map"` -} - -// NewBranchConfig returns a BranchConfig with default values. -func NewBranchConfig() BranchConfig { - return BranchConfig{ - RequestMap: "", - Processors: []Config{}, - ResultMap: "", - } -} diff --git a/internal/component/processor/config_cache.go b/internal/component/processor/config_cache.go deleted file mode 100644 index ff62e94eb7..0000000000 --- a/internal/component/processor/config_cache.go +++ /dev/null @@ -1,21 +0,0 @@ -package processor - -// CacheConfig contains configuration fields for the Cache processor. -type CacheConfig struct { - Resource string `json:"resource" yaml:"resource"` - Operator string `json:"operator" yaml:"operator"` - Key string `json:"key" yaml:"key"` - Value string `json:"value" yaml:"value"` - TTL string `json:"ttl" yaml:"ttl"` -} - -// NewCacheConfig returns a CacheConfig with default values. -func NewCacheConfig() CacheConfig { - return CacheConfig{ - Resource: "", - Operator: "", - Key: "", - Value: "", - TTL: "", - } -} diff --git a/internal/component/processor/config_compress.go b/internal/component/processor/config_compress.go deleted file mode 100644 index fd31eaddfe..0000000000 --- a/internal/component/processor/config_compress.go +++ /dev/null @@ -1,15 +0,0 @@ -package processor - -// CompressConfig contains configuration fields for the Compress processor. -type CompressConfig struct { - Algorithm string `json:"algorithm" yaml:"algorithm"` - Level int `json:"level" yaml:"level"` -} - -// NewCompressConfig returns a CompressConfig with default values. -func NewCompressConfig() CompressConfig { - return CompressConfig{ - Algorithm: "", - Level: -1, - } -} diff --git a/internal/component/processor/config_decompress.go b/internal/component/processor/config_decompress.go deleted file mode 100644 index ac80417ed4..0000000000 --- a/internal/component/processor/config_decompress.go +++ /dev/null @@ -1,13 +0,0 @@ -package processor - -// DecompressConfig contains configuration fields for the Decompress processor. -type DecompressConfig struct { - Algorithm string `json:"algorithm" yaml:"algorithm"` -} - -// NewDecompressConfig returns a DecompressConfig with default values. -func NewDecompressConfig() DecompressConfig { - return DecompressConfig{ - Algorithm: "", - } -} diff --git a/internal/component/processor/config_dedupe.go b/internal/component/processor/config_dedupe.go deleted file mode 100644 index e916133ef4..0000000000 --- a/internal/component/processor/config_dedupe.go +++ /dev/null @@ -1,17 +0,0 @@ -package processor - -// DedupeConfig contains configuration fields for the Dedupe processor. -type DedupeConfig struct { - Cache string `json:"cache" yaml:"cache"` - Key string `json:"key" yaml:"key"` - DropOnCacheErr bool `json:"drop_on_err" yaml:"drop_on_err"` -} - -// NewDedupeConfig returns a DedupeConfig with default values. -func NewDedupeConfig() DedupeConfig { - return DedupeConfig{ - Cache: "", - Key: "", - DropOnCacheErr: true, - } -} diff --git a/internal/component/processor/config_grok.go b/internal/component/processor/config_grok.go deleted file mode 100644 index 16836beceb..0000000000 --- a/internal/component/processor/config_grok.go +++ /dev/null @@ -1,23 +0,0 @@ -package processor - -// GrokConfig contains configuration fields for the Grok processor. -type GrokConfig struct { - Expressions []string `json:"expressions" yaml:"expressions"` - RemoveEmpty bool `json:"remove_empty_values" yaml:"remove_empty_values"` - NamedOnly bool `json:"named_captures_only" yaml:"named_captures_only"` - UseDefaults bool `json:"use_default_patterns" yaml:"use_default_patterns"` - PatternPaths []string `json:"pattern_paths" yaml:"pattern_paths"` - PatternDefinitions map[string]string `json:"pattern_definitions" yaml:"pattern_definitions"` -} - -// NewGrokConfig returns a GrokConfig with default values. -func NewGrokConfig() GrokConfig { - return GrokConfig{ - Expressions: []string{}, - RemoveEmpty: true, - NamedOnly: true, - UseDefaults: true, - PatternPaths: []string{}, - PatternDefinitions: make(map[string]string), - } -} diff --git a/internal/component/processor/config_group_by.go b/internal/component/processor/config_group_by.go deleted file mode 100644 index 7e64bcafe1..0000000000 --- a/internal/component/processor/config_group_by.go +++ /dev/null @@ -1,18 +0,0 @@ -package processor - -// GroupByElement represents a group determined by a condition and a list of -// group specific processors. -type GroupByElement struct { - Check string `json:"check" yaml:"check"` - Processors []Config `json:"processors" yaml:"processors"` -} - -// GroupByConfig is a configuration struct containing fields for the GroupBy -// processor, which breaks message batches down into N batches of a smaller size -// according to conditions. -type GroupByConfig []GroupByElement - -// NewGroupByConfig returns a GroupByConfig with default values. -func NewGroupByConfig() GroupByConfig { - return GroupByConfig{} -} diff --git a/internal/component/processor/config_group_by_value.go b/internal/component/processor/config_group_by_value.go deleted file mode 100644 index cefe29e50b..0000000000 --- a/internal/component/processor/config_group_by_value.go +++ /dev/null @@ -1,16 +0,0 @@ -package processor - -// GroupByValueConfig is a configuration struct containing fields for the -// GroupByValue processor, which breaks message batches down into N batches of a -// smaller size according to a function interpolated string evaluated per -// message part. -type GroupByValueConfig struct { - Value string `json:"value" yaml:"value"` -} - -// NewGroupByValueConfig returns a GroupByValueConfig with default values. -func NewGroupByValueConfig() GroupByValueConfig { - return GroupByValueConfig{ - Value: "", - } -} diff --git a/internal/component/processor/config_insert_part.go b/internal/component/processor/config_insert_part.go deleted file mode 100644 index e60c8b91eb..0000000000 --- a/internal/component/processor/config_insert_part.go +++ /dev/null @@ -1,15 +0,0 @@ -package processor - -// InsertPartConfig contains configuration fields for the InsertPart processor. -type InsertPartConfig struct { - Index int `json:"index" yaml:"index"` - Content string `json:"content" yaml:"content"` -} - -// NewInsertPartConfig returns a InsertPartConfig with default values. -func NewInsertPartConfig() InsertPartConfig { - return InsertPartConfig{ - Index: -1, - Content: "", - } -} diff --git a/internal/component/processor/config_jmespath.go b/internal/component/processor/config_jmespath.go deleted file mode 100644 index c78cce0f03..0000000000 --- a/internal/component/processor/config_jmespath.go +++ /dev/null @@ -1,13 +0,0 @@ -package processor - -// JMESPathConfig contains configuration fields for the JMESPath processor. -type JMESPathConfig struct { - Query string `json:"query" yaml:"query"` -} - -// NewJMESPathConfig returns a JMESPathConfig with default values. -func NewJMESPathConfig() JMESPathConfig { - return JMESPathConfig{ - Query: "", - } -} diff --git a/internal/component/processor/config_jq.go b/internal/component/processor/config_jq.go deleted file mode 100644 index 57988d36c0..0000000000 --- a/internal/component/processor/config_jq.go +++ /dev/null @@ -1,15 +0,0 @@ -package processor - -// JQConfig contains configuration fields for the JQ processor. -type JQConfig struct { - Query string `json:"query" yaml:"query"` - Raw bool `json:"raw" yaml:"raw"` - OutputRaw bool `json:"output_raw" yaml:"output_raw"` -} - -// NewJQConfig returns a JQConfig with default values. -func NewJQConfig() JQConfig { - return JQConfig{ - Query: "", - } -} diff --git a/internal/component/processor/config_jsonschema.go b/internal/component/processor/config_jsonschema.go deleted file mode 100644 index e68bc2a140..0000000000 --- a/internal/component/processor/config_jsonschema.go +++ /dev/null @@ -1,16 +0,0 @@ -package processor - -// JSONSchemaConfig is a configuration struct containing fields for the -// jsonschema processor. -type JSONSchemaConfig struct { - SchemaPath string `json:"schema_path" yaml:"schema_path"` - Schema string `json:"schema" yaml:"schema"` -} - -// NewJSONSchemaConfig returns a JSONSchemaConfig with default values. -func NewJSONSchemaConfig() JSONSchemaConfig { - return JSONSchemaConfig{ - SchemaPath: "", - Schema: "", - } -} diff --git a/internal/component/processor/config_log.go b/internal/component/processor/config_log.go deleted file mode 100644 index 9aa279d093..0000000000 --- a/internal/component/processor/config_log.go +++ /dev/null @@ -1,19 +0,0 @@ -package processor - -// LogConfig contains configuration fields for the Log processor. -type LogConfig struct { - Level string `json:"level" yaml:"level"` - Fields map[string]string `json:"fields" yaml:"fields"` - FieldsMapping string `json:"fields_mapping" yaml:"fields_mapping"` - Message string `json:"message" yaml:"message"` -} - -// NewLogConfig returns a LogConfig with default values. -func NewLogConfig() LogConfig { - return LogConfig{ - Level: "INFO", - Fields: map[string]string{}, - FieldsMapping: "", - Message: "", - } -} diff --git a/internal/component/processor/config_metric.go b/internal/component/processor/config_metric.go deleted file mode 100644 index f520ab1ec1..0000000000 --- a/internal/component/processor/config_metric.go +++ /dev/null @@ -1,19 +0,0 @@ -package processor - -// MetricConfig contains configuration fields for the Metric processor. -type MetricConfig struct { - Type string `json:"type" yaml:"type"` - Name string `json:"name" yaml:"name"` - Labels map[string]string `json:"labels" yaml:"labels"` - Value string `json:"value" yaml:"value"` -} - -// NewMetricConfig returns a MetricConfig with default values. -func NewMetricConfig() MetricConfig { - return MetricConfig{ - Type: "", - Name: "", - Labels: map[string]string{}, - Value: "", - } -} diff --git a/internal/component/processor/config_parallel.go b/internal/component/processor/config_parallel.go deleted file mode 100644 index 64516e1733..0000000000 --- a/internal/component/processor/config_parallel.go +++ /dev/null @@ -1,16 +0,0 @@ -package processor - -// ParallelConfig is a config struct containing fields for the Parallel -// processor. -type ParallelConfig struct { - Cap int `json:"cap" yaml:"cap"` - Processors []Config `json:"processors" yaml:"processors"` -} - -// NewParallelConfig returns a default ParallelConfig. -func NewParallelConfig() ParallelConfig { - return ParallelConfig{ - Cap: 0, - Processors: []Config{}, - } -} diff --git a/internal/component/processor/config_parse_log.go b/internal/component/processor/config_parse_log.go deleted file mode 100644 index 4073a8faa1..0000000000 --- a/internal/component/processor/config_parse_log.go +++ /dev/null @@ -1,24 +0,0 @@ -package processor - -// ParseLogConfig contains configuration fields for the ParseLog processor. -type ParseLogConfig struct { - Format string `json:"format" yaml:"format"` - Codec string `json:"codec" yaml:"codec"` - BestEffort bool `json:"best_effort" yaml:"best_effort"` - WithRFC3339 bool `json:"allow_rfc3339" yaml:"allow_rfc3339"` - WithYear string `json:"default_year" yaml:"default_year"` - WithTimezone string `json:"default_timezone" yaml:"default_timezone"` -} - -// NewParseLogConfig returns a ParseLogConfig with default values. -func NewParseLogConfig() ParseLogConfig { - return ParseLogConfig{ - Format: "", - Codec: "", - - BestEffort: true, - WithRFC3339: true, - WithYear: "current", - WithTimezone: "UTC", - } -} diff --git a/internal/component/processor/config_rate_limit.go b/internal/component/processor/config_rate_limit.go deleted file mode 100644 index d5e3f77e17..0000000000 --- a/internal/component/processor/config_rate_limit.go +++ /dev/null @@ -1,13 +0,0 @@ -package processor - -// RateLimitConfig contains configuration fields for the RateLimit processor. -type RateLimitConfig struct { - Resource string `json:"resource" yaml:"resource"` -} - -// NewRateLimitConfig returns a RateLimitConfig with default values. -func NewRateLimitConfig() RateLimitConfig { - return RateLimitConfig{ - Resource: "", - } -} diff --git a/internal/component/processor/config_select_parts.go b/internal/component/processor/config_select_parts.go deleted file mode 100644 index 69de031da5..0000000000 --- a/internal/component/processor/config_select_parts.go +++ /dev/null @@ -1,14 +0,0 @@ -package processor - -// SelectPartsConfig contains configuration fields for the SelectParts -// processor. -type SelectPartsConfig struct { - Parts []int `json:"parts" yaml:"parts"` -} - -// NewSelectPartsConfig returns a SelectPartsConfig with default values. -func NewSelectPartsConfig() SelectPartsConfig { - return SelectPartsConfig{ - Parts: []int{}, - } -} diff --git a/internal/component/processor/config_sleep.go b/internal/component/processor/config_sleep.go deleted file mode 100644 index cd0e6466e2..0000000000 --- a/internal/component/processor/config_sleep.go +++ /dev/null @@ -1,13 +0,0 @@ -package processor - -// SleepConfig contains configuration fields for the Sleep processor. -type SleepConfig struct { - Duration string `json:"duration" yaml:"duration"` -} - -// NewSleepConfig returns a SleepConfig with default values. -func NewSleepConfig() SleepConfig { - return SleepConfig{ - Duration: "", - } -} diff --git a/internal/component/processor/config_split.go b/internal/component/processor/config_split.go deleted file mode 100644 index fd9b80c387..0000000000 --- a/internal/component/processor/config_split.go +++ /dev/null @@ -1,16 +0,0 @@ -package processor - -// SplitConfig is a configuration struct containing fields for the Split -// processor, which breaks message batches down into batches of a smaller size. -type SplitConfig struct { - Size int `json:"size" yaml:"size"` - ByteSize int `json:"byte_size" yaml:"byte_size"` -} - -// NewSplitConfig returns a SplitConfig with default values. -func NewSplitConfig() SplitConfig { - return SplitConfig{ - Size: 1, - ByteSize: 0, - } -} diff --git a/internal/component/processor/config_subprocess.go b/internal/component/processor/config_subprocess.go deleted file mode 100644 index 4ff83745fb..0000000000 --- a/internal/component/processor/config_subprocess.go +++ /dev/null @@ -1,25 +0,0 @@ -package processor - -import ( - "bufio" -) - -// SubprocessConfig contains configuration fields for the Subprocess processor. -type SubprocessConfig struct { - Name string `json:"name" yaml:"name"` - Args []string `json:"args" yaml:"args"` - MaxBuffer int `json:"max_buffer" yaml:"max_buffer"` - CodecSend string `json:"codec_send" yaml:"codec_send"` - CodecRecv string `json:"codec_recv" yaml:"codec_recv"` -} - -// NewSubprocessConfig returns a SubprocessConfig with default values. -func NewSubprocessConfig() SubprocessConfig { - return SubprocessConfig{ - Name: "", - Args: []string{}, - MaxBuffer: bufio.MaxScanTokenSize, - CodecSend: "lines", - CodecRecv: "lines", - } -} diff --git a/internal/component/processor/config_switch.go b/internal/component/processor/config_switch.go deleted file mode 100644 index 01449a61f4..0000000000 --- a/internal/component/processor/config_switch.go +++ /dev/null @@ -1,58 +0,0 @@ -package processor - -import ( - "encoding/json" -) - -// SwitchCaseConfig contains a condition, processors and other fields for an -// individual case in the Switch processor. -type SwitchCaseConfig struct { - Check string `json:"check" yaml:"check"` - Processors []Config `json:"processors" yaml:"processors"` - Fallthrough bool `json:"fallthrough" yaml:"fallthrough"` -} - -// NewSwitchCaseConfig returns a new SwitchCaseConfig with default values. -func NewSwitchCaseConfig() SwitchCaseConfig { - return SwitchCaseConfig{ - Check: "", - Processors: []Config{}, - Fallthrough: false, - } -} - -// UnmarshalJSON ensures that when parsing configs that are in a map or slice -// the default values are still applied. -func (s *SwitchCaseConfig) UnmarshalJSON(bytes []byte) error { - type confAlias SwitchCaseConfig - aliased := confAlias(NewSwitchCaseConfig()) - - if err := json.Unmarshal(bytes, &aliased); err != nil { - return err - } - - *s = SwitchCaseConfig(aliased) - return nil -} - -// UnmarshalYAML ensures that when parsing configs that are in a map or slice -// the default values are still applied. -func (s *SwitchCaseConfig) UnmarshalYAML(unmarshal func(any) error) error { - type confAlias SwitchCaseConfig - aliased := confAlias(NewSwitchCaseConfig()) - - if err := unmarshal(&aliased); err != nil { - return err - } - - *s = SwitchCaseConfig(aliased) - return nil -} - -// SwitchConfig is a config struct containing fields for the Switch processor. -type SwitchConfig []SwitchCaseConfig - -// NewSwitchConfig returns a default SwitchConfig. -func NewSwitchConfig() SwitchConfig { - return SwitchConfig{} -} diff --git a/internal/component/processor/config_while.go b/internal/component/processor/config_while.go deleted file mode 100644 index 61f5a572d0..0000000000 --- a/internal/component/processor/config_while.go +++ /dev/null @@ -1,20 +0,0 @@ -package processor - -// WhileConfig is a config struct containing fields for the While -// processor. -type WhileConfig struct { - AtLeastOnce bool `json:"at_least_once" yaml:"at_least_once"` - MaxLoops int `json:"max_loops" yaml:"max_loops"` - Check string `json:"check" yaml:"check"` - Processors []Config `json:"processors" yaml:"processors"` -} - -// NewWhileConfig returns a default WhileConfig. -func NewWhileConfig() WhileConfig { - return WhileConfig{ - AtLeastOnce: false, - MaxLoops: 0, - Check: "", - Processors: []Config{}, - } -} diff --git a/internal/component/processor/config_workflow.go b/internal/component/processor/config_workflow.go deleted file mode 100644 index 82ece5aecb..0000000000 --- a/internal/component/processor/config_workflow.go +++ /dev/null @@ -1,20 +0,0 @@ -package processor - -// WorkflowConfig is a config struct containing fields for the Workflow -// processor. -type WorkflowConfig struct { - MetaPath string `json:"meta_path" yaml:"meta_path"` - Order [][]string `json:"order" yaml:"order"` - BranchResources []string `json:"branch_resources" yaml:"branch_resources"` - Branches map[string]BranchConfig `json:"branches" yaml:"branches"` -} - -// NewWorkflowConfig returns a default WorkflowConfig. -func NewWorkflowConfig() WorkflowConfig { - return WorkflowConfig{ - MetaPath: "meta.workflow", - Order: [][]string{}, - BranchResources: []string{}, - Branches: map[string]BranchConfig{}, - } -} diff --git a/internal/component/processor/config_xml.go b/internal/component/processor/config_xml.go deleted file mode 100644 index 7881776555..0000000000 --- a/internal/component/processor/config_xml.go +++ /dev/null @@ -1,15 +0,0 @@ -package processor - -// XMLConfig contains configuration fields for the XML processor. -type XMLConfig struct { - Operator string `json:"operator" yaml:"operator"` - Cast bool `json:"cast" yaml:"cast"` -} - -// NewXMLConfig returns a XMLConfig with default values. -func NewXMLConfig() XMLConfig { - return XMLConfig{ - Operator: "", - Cast: false, - } -} diff --git a/internal/component/ratelimit/config.go b/internal/component/ratelimit/config.go index 1da69e78d8..748d1a6e87 100644 --- a/internal/component/ratelimit/config.go +++ b/internal/component/ratelimit/config.go @@ -1,6 +1,8 @@ package ratelimit import ( + "fmt" + yaml "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/docs" @@ -57,3 +59,55 @@ func (conf *Config) UnmarshalYAML(value *yaml.Node) error { *conf = Config(aliased) return nil } + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeRateLimit, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + conf.Label, _ = value["label"].(string) + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeRateLimit, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + for i := 0; i < len(value.Content)-1; i += 2 { + if value.Content[i].Value == "label" { + conf.Label = value.Content[i+1].Value + break + } + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/component/scanner/config.go b/internal/component/scanner/config.go index bbdee5700e..e7a5629569 100644 --- a/internal/component/scanner/config.go +++ b/internal/component/scanner/config.go @@ -13,21 +13,21 @@ type Config struct { Plugin any } -func FromAny(prov docs.Provider, cType docs.Type, value any) (conf Config, err error) { +func FromAny(prov docs.Provider, value any) (conf Config, err error) { switch t := value.(type) { case Config: return t, nil case *yaml.Node: - return fromYAML(prov, cType, t) + return fromYAML(prov, t) case map[string]any: - return fromMap(prov, cType, t) + return fromMap(prov, t) } err = fmt.Errorf("unexpected value, expected object, got %T", value) return } -func fromMap(prov docs.Provider, cType docs.Type, value map[string]any) (conf Config, err error) { - if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, cType, value); err != nil { +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeScanner, value); err != nil { err = docs.NewLintError(0, docs.LintComponentNotFound, err) return } @@ -40,8 +40,8 @@ func fromMap(prov docs.Provider, cType docs.Type, value map[string]any) (conf Co return } -func fromYAML(prov docs.Provider, cType docs.Type, value *yaml.Node) (conf Config, err error) { - if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, cType, value); err != nil { +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeScanner, value); err != nil { err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) return } diff --git a/internal/component/tracer/config.go b/internal/component/tracer/config.go index a25353caf6..d810cc9179 100644 --- a/internal/component/tracer/config.go +++ b/internal/component/tracer/config.go @@ -1,6 +1,8 @@ package tracer import ( + "fmt" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/propagation" yaml "gopkg.in/yaml.v3" @@ -58,3 +60,46 @@ func (conf *Config) UnmarshalYAML(value *yaml.Node) error { *conf = Config(aliased) return nil } + +func FromAny(prov docs.Provider, value any) (conf Config, err error) { + switch t := value.(type) { + case Config: + return t, nil + case *yaml.Node: + return fromYAML(prov, t) + case map[string]any: + return fromMap(prov, t) + } + err = fmt.Errorf("unexpected value, expected object, got %T", value) + return +} + +func fromMap(prov docs.Provider, value map[string]any) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromMap(prov, docs.TypeTracer, value); err != nil { + err = docs.NewLintError(0, docs.LintComponentNotFound, err) + return + } + + if p, exists := value[conf.Type]; exists { + conf.Plugin = p + } else if p, exists := value["plugin"]; exists { + conf.Plugin = p + } + return +} + +func fromYAML(prov docs.Provider, value *yaml.Node) (conf Config, err error) { + if conf.Type, _, err = docs.GetInferenceCandidateFromYAML(prov, docs.TypeTracer, value); err != nil { + err = docs.NewLintError(value.Line, docs.LintComponentNotFound, err) + return + } + + pluginNode, err := docs.GetPluginConfigYAML(conf.Type, value) + if err != nil { + err = docs.NewLintError(value.Line, docs.LintFailedRead, err) + return + } + + conf.Plugin = &pluginNode + return +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 89ba132c2c..b6f34c5ddc 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -13,6 +13,7 @@ import ( "github.com/benthosdev/benthos/v4/internal/config" "github.com/benthosdev/benthos/v4/internal/docs" + _ "github.com/benthosdev/benthos/v4/public/components/io" _ "github.com/benthosdev/benthos/v4/public/components/pure" ) diff --git a/internal/config/stream_reader_test.go b/internal/config/stream_reader_test.go index 41c757cbd9..0e8148764f 100644 --- a/internal/config/stream_reader_test.go +++ b/internal/config/stream_reader_test.go @@ -106,7 +106,7 @@ pipeline: require.Contains(t, streamConfs, "inner_second") require.Contains(t, streamConfs, "inner_third") - assert.Equal(t, `root = "first"`, streamConfs["first"].Pipeline.Processors[0].Bloblang) - assert.Equal(t, `root = "second"`, streamConfs["inner_second"].Pipeline.Processors[0].Bloblang) - assert.Equal(t, `root = "third"`, streamConfs["inner_third"].Pipeline.Processors[0].Bloblang) + assert.Equal(t, `root = "first"`, gabs.Wrap(testConfToAny(t, streamConfs["first"])).S("pipeline", "processors", "0", "bloblang").Data()) + assert.Equal(t, `root = "second"`, gabs.Wrap(testConfToAny(t, streamConfs["inner_second"])).S("pipeline", "processors", "0", "bloblang").Data()) + assert.Equal(t, `root = "third"`, gabs.Wrap(testConfToAny(t, streamConfs["inner_third"])).S("pipeline", "processors", "0", "bloblang").Data()) } diff --git a/internal/docs/field.go b/internal/docs/field.go index a3cd8a777b..8e00223478 100644 --- a/internal/docs/field.go +++ b/internal/docs/field.go @@ -335,6 +335,12 @@ func lintsFromAny(line int, v any) (lints []Lint) { // binary that defines it as the function cannot be serialized into a portable // schema. func (f FieldSpec) LinterBlobl(blobl string) FieldSpec { + if blobl == "" { + f.Linter = blobl + f.customLintFn = nil + return f + } + env := bloblang.NewEnvironment().OnlyPure() m, err := env.Parse(blobl) diff --git a/internal/impl/io/processor_subprocess.go b/internal/impl/io/processor_subprocess.go index 5e6f1f3f34..2c75e511e9 100644 --- a/internal/impl/io/processor_subprocess.go +++ b/internal/impl/io/processor_subprocess.go @@ -16,39 +16,47 @@ import ( "github.com/benthosdev/benthos/v4/internal/bundle" "github.com/benthosdev/benthos/v4/internal/component" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" "github.com/benthosdev/benthos/v4/internal/shutdown" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newSubprocess(conf.Subprocess, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("subprocess", p, mgr), nil - }, docs.ComponentSpec{ - Name: "subprocess", - Categories: []string{ - "Integration", - }, - Summary: ` -Executes a command as a subprocess and, for each message, will pipe its contents to the stdin stream of the process followed by a newline.`, - Description: ` +const ( + spFieldName = "name" + spFieldArgs = "args" + spFieldMaxBuffer = "max_buffer" + spFieldCodecSend = "codec_send" + spFieldCodecRecv = "codec_recv" +) + +type subprocConfig struct { + Name string + Args []string + MaxBuffer int + CodecSend string + CodecRecv string +} + +func subProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Integration"). + Stable(). + Summary("Executes a command as a subprocess and, for each message, will pipe its contents to the stdin stream of the process followed by a newline."). + Description(` :::info -This processor keeps the subprocess alive and requires very specific behaviour from the command executed. If you wish to simply execute a command for each message take a look at the [` + "`command`" + ` processor](/docs/components/processors/command) instead. +This processor keeps the subprocess alive and requires very specific behaviour from the command executed. If you wish to simply execute a command for each message take a look at the [`+"`command`"+` processor](/docs/components/processors/command) instead. ::: The subprocess must then either return a line over stdout or stderr. If a response is returned over stdout then its contents will replace the message. If a response is instead returned from stderr it will be logged and the message will continue unchanged and will be [marked as failed](/docs/configuration/error_handling). -Rather than separating data by a newline it's possible to specify alternative ` + "[`codec_send`](#codec_send) and [`codec_recv`](#codec_recv)" + ` values, which allow binary messages to be encoded for logical separation. +Rather than separating data by a newline it's possible to specify alternative `+"[`codec_send`](#codec_send) and [`codec_recv`](#codec_recv)"+` values, which allow binary messages to be encoded for logical separation. The execution environment of the subprocess is the same as the Benthos instance, including environment variables and the current working directory. -The field ` + "`max_buffer`" + ` defines the maximum response size able to be read from the subprocess. This value should be set significantly above the real expected maximum response size. +The field `+"`max_buffer`"+` defines the maximum response size able to be read from the subprocess. This value should be set significantly above the real expected maximum response size. ## Subprocess requirements @@ -56,19 +64,60 @@ It is required that subprocesses flush their stdout and stderr pipes for each li ## Messages containing line breaks -If a message contains line breaks each line of the message is piped to the subprocess and flushed, and a response is expected from the subprocess before another line is fed in.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("name", "The command to execute as a subprocess.", "cat", "sed", "awk"), - docs.FieldString("args", "A list of arguments to provide the command.").Array(), - docs.FieldInt("max_buffer", "The maximum expected response size.").Advanced(), - docs.FieldString( - "codec_send", "Determines how messages written to the subprocess are encoded, which allows them to be logically separated.", - ).HasOptions("lines", "length_prefixed_uint32_be", "netstring").AtVersion("3.37.0").Advanced(), - docs.FieldString( - "codec_recv", "Determines how messages read from the subprocess are decoded, which allows them to be logically separated.", - ).HasOptions("lines", "length_prefixed_uint32_be", "netstring").AtVersion("3.37.0").Advanced(), - ).ChildDefaultAndTypesFromStruct(processor.NewSubprocessConfig()), - }) +If a message contains line breaks each line of the message is piped to the subprocess and flushed, and a response is expected from the subprocess before another line is fed in.`). + Fields( + service.NewStringField(spFieldName). + Description("The command to execute as a subprocess."). + Examples("cat", "sed", "awk"), + service.NewStringListField(spFieldArgs). + Description("A list of arguments to provide the command."). + Default([]any{}), + service.NewIntField(spFieldMaxBuffer). + Description("The maximum expected response size."). + Advanced(). + Default(bufio.MaxScanTokenSize), + service.NewStringEnumField(spFieldCodecSend, "lines", "length_prefixed_uint32_be", "netstring"). + Description("Determines how messages written to the subprocess are encoded, which allows them to be logically separated."). + Version("3.37.0"). + Advanced(). + Default("lines"), + service.NewStringEnumField(spFieldCodecRecv, "lines", "length_prefixed_uint32_be", "netstring"). + Description("Determines how messages read from the subprocess are decoded, which allows them to be logically separated."). + Version("3.37.0"). + Advanced(). + Default("lines"), + ) +} + +func init() { + err := service.RegisterBatchProcessor( + "subprocess", subProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + var sConf subprocConfig + var err error + if sConf.Name, err = conf.FieldString(spFieldName); err != nil { + return nil, err + } + if sConf.Args, err = conf.FieldStringList(spFieldArgs); err != nil { + return nil, err + } + if sConf.MaxBuffer, err = conf.FieldInt(spFieldMaxBuffer); err != nil { + return nil, err + } + if sConf.CodecSend, err = conf.FieldString(spFieldCodecSend); err != nil { + return nil, err + } + if sConf.CodecRecv, err = conf.FieldString(spFieldCodecRecv); err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newSubprocess(sConf, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("subprocess", p, mgr)), nil + }) if err != nil { panic(err) } @@ -82,7 +131,7 @@ type subprocessProc struct { mut sync.Mutex } -func newSubprocess(conf processor.SubprocessConfig, mgr bundle.NewManagement) (*subprocessProc, error) { +func newSubprocess(conf subprocConfig, mgr bundle.NewManagement) (*subprocessProc, error) { e := &subprocessProc{ log: mgr.Logger(), } diff --git a/internal/impl/io/processor_subprocess_test.go b/internal/impl/io/processor_subprocess_test.go index 19a93c3789..6e494e600d 100644 --- a/internal/impl/io/processor_subprocess_test.go +++ b/internal/impl/io/processor_subprocess_test.go @@ -2,12 +2,14 @@ package io_test import ( "context" + "fmt" "os" "path" "reflect" "testing" "time" + "github.com/Jeffail/gabs/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -19,10 +21,12 @@ import ( func TestSubprocessWithSed(t *testing.T) { t.Skip("disabled for now") - conf := processor.NewConfig() - conf.Type = "subprocess" - conf.Subprocess.Name = "sed" - conf.Subprocess.Args = []string{"s/foo/bar/g", "-u"} + conf, err := processor.FromYAML(` +subprocess: + name: sed + args: [ "s/foo/bar/g", "-u" ] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -59,9 +63,11 @@ func TestSubprocessWithSed(t *testing.T) { func TestSubprocessWithCat(t *testing.T) { t.Skip("disabled for now") - conf := processor.NewConfig() - conf.Type = "subprocess" - conf.Subprocess.Name = "cat" + conf, err := processor.FromYAML(` +subprocess: + name: cat +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -98,10 +104,12 @@ func TestSubprocessWithCat(t *testing.T) { func TestSubprocessLineBreaks(t *testing.T) { t.Skip("disabled for now") - conf := processor.NewConfig() - conf.Type = "subprocess" - conf.Subprocess.Name = "sed" - conf.Subprocess.Args = []string{`s/\(^$\)\|\(foo\)/bar/`, "-u"} + conf, err := processor.FromYAML(` +subprocess: + name: sed + args: [ "s/\\(^$\\)\\|\\(foo\\)/bar/", "-u" ] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -140,10 +148,12 @@ func TestSubprocessLineBreaks(t *testing.T) { } func TestSubprocessWithErrors(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "subprocess" - conf.Subprocess.Name = "sh" - conf.Subprocess.Args = []string{"-c", "cat 1>&2"} + conf, err := processor.FromYAML(` +subprocess: + name: sh + args: [ "-c", "cat 1>&2" ] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -242,12 +252,14 @@ func main() { } `) f := func(formatSend, formatRecv string, extra bool) { - conf := processor.NewConfig() - conf.Type = "subprocess" - conf.Subprocess.Name = "go" - conf.Subprocess.Args = []string{"run", filePath, "-stdinCodec", formatSend, "-stdoutCodec", formatRecv} - conf.Subprocess.CodecSend = formatSend - conf.Subprocess.CodecRecv = formatRecv + conf, err := processor.FromYAML(fmt.Sprintf(` +subprocess: + name: go + args: %v + codec_send: %v + codec_recv: %v +`, gabs.Wrap([]string{"run", filePath, "-stdinCodec", formatSend, "-stdoutCodec", formatRecv}).String(), formatSend, formatRecv)) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) diff --git a/internal/impl/pure/algorithms.go b/internal/impl/pure/algorithms.go index 269b93ae9d..af52b0c050 100644 --- a/internal/impl/pure/algorithms.go +++ b/internal/impl/pure/algorithms.go @@ -5,6 +5,7 @@ import ( "compress/bzip2" "fmt" "io" + "sort" "sync" "github.com/klauspost/compress/flate" @@ -71,6 +72,32 @@ func AddKnownCompressionAlgorithm(name string, a KnownCompressionAlgorithm) stru return struct{}{} } +func CompressionAlgsList() (v []string) { + knownCompressionAlgorithmsLock.Lock() + v = make([]string, 0, len(knownCompressionAlgorithms)) + for k, a := range knownCompressionAlgorithms { + if a.CompressFunc != nil { + v = append(v, k) + } + } + knownCompressionAlgorithmsLock.Unlock() + sort.Strings(v) + return v +} + +func DecompressionAlgsList() (v []string) { + knownCompressionAlgorithmsLock.Lock() + v = make([]string, 0, len(knownCompressionAlgorithms)) + for k, a := range knownCompressionAlgorithms { + if a.DecompressFunc != nil { + v = append(v, k) + } + } + knownCompressionAlgorithmsLock.Unlock() + sort.Strings(v) + return v +} + func strToCompressAlg(str string) (KnownCompressionAlgorithm, error) { fn, exists := knownCompressionAlgorithms[str] if !exists { diff --git a/internal/impl/pure/input_sequence_test.go b/internal/impl/pure/input_sequence_test.go index fe748e0aa2..a4543ec700 100644 --- a/internal/impl/pure/input_sequence_test.go +++ b/internal/impl/pure/input_sequence_test.go @@ -11,7 +11,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - yaml "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/component/input" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -386,8 +385,7 @@ func TestSequenceSad(t *testing.T) { writeFiles(t, tmpDir, files) - conf := input.NewConfig() - require.NoError(t, yaml.Unmarshal(fmt.Appendf(nil, ` + conf, err := input.FromYAML(fmt.Sprintf(` sequence: inputs: - file: @@ -399,7 +397,8 @@ sequence: - file: paths: - "%v/f3" -`, tmpDir, tmpDir, tmpDir), &conf)) +`, tmpDir, tmpDir, tmpDir)) + require.NoError(t, err) rdr, err := mock.NewManager().NewInput(conf) require.NoError(t, err) @@ -464,14 +463,14 @@ func TestSequenceEarlyTermination(t *testing.T) { "f1": "foo\nbar\nbaz", }) - conf := input.NewConfig() - require.NoError(t, yaml.Unmarshal(fmt.Appendf(nil, ` + conf, err := input.FromYAML(fmt.Sprintf(` sequence: inputs: - file: paths: - "%v/f1" -`, tmpDir), &conf)) +`, tmpDir)) + require.NoError(t, err) rdr, err := mock.NewManager().NewInput(conf) require.NoError(t, err) diff --git a/internal/impl/pure/processor_bloblang.go b/internal/impl/pure/processor_bloblang.go index 314f950129..9a2691fb5b 100644 --- a/internal/impl/pure/processor_bloblang.go +++ b/internal/impl/pure/processor_bloblang.go @@ -7,53 +7,37 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/mapping" "github.com/benthosdev/benthos/v4/internal/bloblang/parser" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newBloblang(conf.Bloblang, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("bloblang", p, mgr), nil - }, docs.ComponentSpec{ - Name: "bloblang", - Categories: []string{ - "Mapping", - "Parsing", - }, - Config: docs.FieldString("", "").IsBloblang().HasDefault(""), - Summary: ` -Executes a [Bloblang](/docs/guides/bloblang/about) mapping on messages.`, - Description: ` + err := service.RegisterBatchProcessor("bloblang", service.NewConfigSpec(). + Stable(). + Categories("Mapping", "Parsing"). + Summary("Executes a [Bloblang](/docs/guides/bloblang/about) mapping on messages."). + Description(` Bloblang is a powerful language that enables a wide range of mapping, transformation and filtering tasks. For more information [check out the docs](/docs/guides/bloblang/about). -If your mapping is large and you'd prefer for it to live in a separate file then you can execute a mapping directly from a file with the expression ` + "`from \"\"`" + `, where the path must be absolute, or relative from the location that Benthos is executed from. +If your mapping is large and you'd prefer for it to live in a separate file then you can execute a mapping directly from a file with the expression `+"`from \"\"`"+`, where the path must be absolute, or relative from the location that Benthos is executed from. ## Component Rename -This processor was recently renamed to the ` + "[`mapping` processor](/docs/components/processors/mapping)" + ` in order to make the purpose of the processor more prominent. It is still valid to use the existing ` + "`bloblang`" + ` name but eventually it will be deprecated and replaced by the new name in example configs.`, - Footnotes: ` +This processor was recently renamed to the `+"[`mapping` processor](/docs/components/processors/mapping)"+` in order to make the purpose of the processor more prominent. It is still valid to use the existing `+"`bloblang`"+` name but eventually it will be deprecated and replaced by the new name in example configs.`). + Footnotes(` ## Error Handling -Bloblang mappings can fail, in which case the message remains unchanged, errors -are logged, and the message is flagged as having failed, allowing you to use +Bloblang mappings can fail, in which case the message remains unchanged, errors are logged, and the message is flagged as having failed, allowing you to use [standard processor error handling patterns](/docs/configuration/error_handling). -However, Bloblang itself also provides powerful ways of ensuring your mappings -do not fail by specifying desired fallback behaviour, which you can read about -[in this section](/docs/guides/bloblang/about#error-handling).`, - Examples: []docs.AnnotatedExample{ - { - Title: "Mapping", - Summary: ` +However, Bloblang itself also provides powerful ways of ensuring your mappings do not fail by specifying desired fallback behaviour, which you can read about [in this section](/docs/guides/bloblang/about#error-handling).`). + Example("Mapping", ` Given JSON documents containing an array of fans: -` + "```json" + ` +`+"```json"+` { "id":"foo", "description":"a show about foo", @@ -64,11 +48,11 @@ Given JSON documents containing an array of fans: {"name":"vic","obsession":0.43} ] } -` + "```" + ` +`+"```"+` We can reduce the fans to only those with an obsession score above 0.5, giving us: -` + "```json" + ` +`+"```json"+` { "id":"foo", "description":"a show about foo", @@ -77,23 +61,21 @@ We can reduce the fans to only those with an obsession score above 0.5, giving u {"name":"ali","obsession":0.89} ] } -` + "```" + ` +`+"```"+` With the following config:`, - Config: ` + ` pipeline: processors: - bloblang: | root = this root.fans = this.fans.filter(fan -> fan.obsession > 0.5) `, - }, - { - Title: "More Mapping", - Summary: ` + ). + Example("More Mapping", ` When receiving JSON documents of the form: -` + "```json" + ` +`+"```json"+` { "locations": [ {"name": "Seattle", "state": "WA"}, @@ -102,16 +84,16 @@ When receiving JSON documents of the form: {"name": "Olympia", "state": "WA"} ] } -` + "```" + ` +`+"```"+` -We could collapse the location names from the state of Washington into a field ` + "`Cities`" + `: +We could collapse the location names from the state of Washington into a field `+"`Cities`"+`: -` + "```json" + ` +`+"```json"+` {"Cities": "Bellevue, Olympia, Seattle"} -` + "```" + ` +`+"```"+` With the following config:`, - Config: ` + ` pipeline: processors: - bloblang: | @@ -119,10 +101,22 @@ pipeline: filter(loc -> loc.state == "WA"). map_each(loc -> loc.name). sort().join(", ") -`, - }, - }, - }) +`). + Field(service.NewBloblangField("").Default("")), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + m, err := conf.FieldString() + if err != nil { + return nil, err + } + mgr := interop.UnwrapManagement(res) + p, err := newBloblang(m, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor( + processor.NewAutoObservedBatchedProcessor("bloblang", p, mgr), + ), nil + }) if err != nil { panic(err) } diff --git a/internal/impl/pure/processor_bloblang_test.go b/internal/impl/pure/processor_bloblang_test.go index 13956f8062..45eab437bb 100644 --- a/internal/impl/pure/processor_bloblang_test.go +++ b/internal/impl/pure/processor_bloblang_test.go @@ -31,7 +31,7 @@ func TestBloblangCrossfire(t *testing.T) { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = ` + conf.Plugin = ` foo = json("foo").from(0) foo.bar_new = "this is swapped now" foo.bar.baz = "and this changed" @@ -90,7 +90,7 @@ func TestBloblangContext(t *testing.T) { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = `result = foo.bar.baz.uppercase()` + conf.Plugin = `result = foo.bar.baz.uppercase()` proc, err := mock.NewManager().NewProcessor(conf) if err != nil { t.Fatal(err) @@ -127,7 +127,7 @@ func TestBloblangCustomObject(t *testing.T) { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = `root.foos = this.foos` + conf.Plugin = `root.foos = this.foos` proc, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -150,7 +150,7 @@ func TestBloblangFiltering(t *testing.T) { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = ` + conf.Plugin = ` root = match { (foo | bar).delete.or(false) => deleted(), } @@ -180,7 +180,7 @@ func TestBloblangFilterAll(t *testing.T) { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = `root = deleted()` + conf.Plugin = `root = deleted()` proc, err := mock.NewManager().NewProcessor(conf) if err != nil { t.Fatal(err) @@ -198,7 +198,7 @@ func TestBloblangJSONError(t *testing.T) { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = ` + conf.Plugin = ` foo = json().bar ` proc, err := mock.NewManager().NewProcessor(conf) diff --git a/internal/impl/pure/processor_bounds_check.go b/internal/impl/pure/processor_bounds_check.go index 6aeba16930..1037ac230c 100644 --- a/internal/impl/pure/processor_bounds_check.go +++ b/internal/impl/pure/processor_bounds_check.go @@ -5,75 +5,123 @@ import ( "errors" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) +const ( + bcpFieldMaxParts = "max_parts" + bcpFieldMinParts = "min_parts" + bcpFieldMaxPartSize = "max_part_size" + bcpFieldMinPartSize = "min_part_size" +) + +func bcProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary("Removes messages (and batches) that do not fit within certain size boundaries."). + Fields( + service.NewIntField(bcpFieldMaxPartSize). + Description("The maximum size of a message to allow (in bytes)"). + Default(1*1024*1024*1024), + service.NewIntField(bcpFieldMinPartSize). + Description("The minimum size of a message to allow (in bytes)"). + Default(1), + service.NewIntField(bcpFieldMaxParts). + Description("The maximum size of message batches to allow (in message count)"). + Advanced(). + Default(100), + service.NewIntField(bcpFieldMinParts). + Description("The minimum size of message batches to allow (in message count)"). + Advanced(). + Default(1), + ) +} + func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newBoundsCheck(conf.BoundsCheck, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("bounds_check", p, mgr), nil - }, docs.ComponentSpec{ - Name: "bounds_check", - Categories: []string{ - "Utility", - }, - Summary: ` -Removes messages (and batches) that do not fit within certain size boundaries.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldInt("max_part_size", "The maximum size of a message to allow (in bytes)"), - docs.FieldInt("min_part_size", "The minimum size of a message to allow (in bytes)"), - docs.FieldInt("max_parts", "The maximum size of message batches to allow (in message count)").Advanced(), - docs.FieldInt("min_parts", "The minimum size of message batches to allow (in message count)").Advanced(), - ).ChildDefaultAndTypesFromStruct(processor.NewBoundsCheckConfig()), - }) + err := service.RegisterBatchProcessor( + "bounds_check", bcProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + maxParts, err := conf.FieldInt(bcpFieldMaxParts) + if err != nil { + return nil, err + } + + minParts, err := conf.FieldInt(bcpFieldMinParts) + if err != nil { + return nil, err + } + + maxPartSize, err := conf.FieldInt(bcpFieldMaxPartSize) + if err != nil { + return nil, err + } + + minPartSize, err := conf.FieldInt(bcpFieldMinPartSize) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newBoundsCheck(maxParts, minParts, maxPartSize, minPartSize, mgr) + if err != nil { + return nil, err + } + + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("bounds_check", p, mgr)), nil + }) if err != nil { panic(err) } } type boundsCheck struct { - conf processor.BoundsCheckConfig - log log.Modular + maxParts int + minParts int + maxPartSize int + minPartSize int + log log.Modular } // newBoundsCheck returns a BoundsCheck processor. -func newBoundsCheck(conf processor.BoundsCheckConfig, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { +func newBoundsCheck(maxParts, minParts, maxPartSize, minPartSize int, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { return &boundsCheck{ - conf: conf, - log: mgr.Logger(), + maxParts: maxParts, + minParts: minParts, + maxPartSize: maxPartSize, + minPartSize: minPartSize, + log: mgr.Logger(), }, nil } func (m *boundsCheck) ProcessBatch(ctx *processor.BatchProcContext, msg message.Batch) ([]message.Batch, error) { lParts := msg.Len() - if lParts < m.conf.MinParts { + if lParts < m.minParts { m.log.Debugf( "Rejecting message due to message parts below minimum (%v): %v\n", - m.conf.MinParts, lParts, + m.minParts, lParts, ) return nil, nil - } else if lParts > m.conf.MaxParts { + } else if lParts > m.maxParts { m.log.Debugf( "Rejecting message due to message parts exceeding limit (%v): %v\n", - m.conf.MaxParts, lParts, + m.maxParts, lParts, ) return nil, nil } var reject bool _ = msg.Iter(func(i int, p *message.Part) error { - if size := len(p.AsBytes()); size > m.conf.MaxPartSize || - size < m.conf.MinPartSize { + if size := len(p.AsBytes()); size > m.maxPartSize || + size < m.minPartSize { m.log.Debugf( "Rejecting message due to message part size (%v -> %v): %v\n", - m.conf.MinPartSize, - m.conf.MaxPartSize, + m.minPartSize, + m.maxPartSize, size, ) reject = true diff --git a/internal/impl/pure/processor_bounds_check_test.go b/internal/impl/pure/processor_bounds_check_test.go index 02b66eef6b..adf5586d94 100644 --- a/internal/impl/pure/processor_bounds_check_test.go +++ b/internal/impl/pure/processor_bounds_check_test.go @@ -13,12 +13,14 @@ import ( ) func TestBoundsCheck(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "bounds_check" - conf.BoundsCheck.MinParts = 2 - conf.BoundsCheck.MaxParts = 3 - conf.BoundsCheck.MaxPartSize = 10 - conf.BoundsCheck.MinPartSize = 1 + conf, err := processor.FromYAML(` +bounds_check: + min_parts: 2 + max_parts: 3 + max_part_size: 10 + min_part_size: 1 +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_branch.go b/internal/impl/pure/processor_branch.go index 50b1a8aa33..0d6a81cba7 100644 --- a/internal/impl/pure/processor_branch.go +++ b/internal/impl/pure/processor_branch.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "sort" - "strconv" "time" "go.opentelemetry.io/otel/trace" @@ -13,97 +12,42 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/mapping" "github.com/benthosdev/benthos/v4/internal/bloblang/query" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/metrics" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" "github.com/benthosdev/benthos/v4/internal/tracing" + "github.com/benthosdev/benthos/v4/public/service" ) -var branchFields = docs.FieldSpecs{ - docs.FieldBloblang( - "request_map", - "A [Bloblang mapping](/docs/guides/bloblang/about) that describes how to create a request payload suitable for the child processors of this branch. If left empty then the branch will begin with an exact copy of the origin message (including metadata).", - `root = { - "id": this.doc.id, - "content": this.doc.body.text -}`, - `root = if this.type == "foo" { - this.foo.request -} else { - deleted() -}`, - ).HasDefault(""), - docs.FieldProcessor( - "processors", - "A list of processors to apply to mapped requests. When processing message batches the resulting batch must match the size and ordering of the input batch, therefore filtering, grouping should not be performed within these processors.", - ).Array().HasDefault([]any{}), - docs.FieldBloblang( - "result_map", - "A [Bloblang mapping](/docs/guides/bloblang/about) that describes how the resulting messages from branched processing should be mapped back into the original payload. If left empty the origin message will remain unchanged (including metadata).", - `meta foo_code = meta("code") -root.foo_result = this`, - `meta = meta() -root.bar.body = this.body -root.bar.id = this.user.id`, - `root.raw_result = content().string()`, - `root.enrichments.foo = if meta("request_failed") != null { - throw(meta("request_failed")) -} else { - this -}`, - ).HasDefault(""), -} +const ( + branchProcFieldReqMap = "request_map" + branchProcFieldProcs = "processors" + branchProcFieldResMap = "result_map" +) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - return newBranch(conf.Branch, mgr) - }, docs.ComponentSpec{ - Name: "branch", - Status: docs.StatusStable, - Categories: []string{ - "Composition", - }, - Summary: ` -The ` + "`branch`" + ` processor allows you to create a new request message via -a [Bloblang mapping](/docs/guides/bloblang/about), execute a list of processors -on the request messages, and, finally, map the result back into the source -message using another mapping.`, - Description: ` -This is useful for preserving the original message contents when using -processors that would otherwise replace the entire contents. +func branchProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary(`The `+"`branch`"+` processor allows you to create a new request message via a [Bloblang mapping](/docs/guides/bloblang/about), execute a list of processors on the request messages, and, finally, map the result back into the source message using another mapping.`). + Description(` +This is useful for preserving the original message contents when using processors that would otherwise replace the entire contents. ### Metadata -Metadata fields that are added to messages during branch processing will not be -automatically copied into the resulting message. In order to do this you should -explicitly declare in your ` + "`result_map`" + ` either a wholesale copy with -` + "`meta = meta()`" + `, or selective copies with -` + "`meta foo = meta(\"bar\")`" + ` and so on. +Metadata fields that are added to messages during branch processing will not be automatically copied into the resulting message. In order to do this you should explicitly declare in your `+"`result_map`"+` either a wholesale copy with `+"`meta = meta()`"+`, or selective copies with `+"`meta foo = meta(\"bar\")`"+` and so on. ### Error Handling -If the ` + "`request_map`" + ` fails the child processors will not be executed. -If the child processors themselves result in an (uncaught) error then the -` + "`result_map`" + ` will not be executed. If the ` + "`result_map`" + ` fails -the message will remain unchanged. Under any of these conditions standard -[error handling methods](/docs/configuration/error_handling) can be used in -order to filter, DLQ or recover the failed messages. +If the `+"`request_map`"+` fails the child processors will not be executed. If the child processors themselves result in an (uncaught) error then the `+"`result_map`"+` will not be executed. If the `+"`result_map`"+` fails the message will remain unchanged. Under any of these conditions standard [error handling methods](/docs/configuration/error_handling) can be used in order to filter, DLQ or recover the failed messages. ### Conditional Branching -If the root of your request map is set to ` + "`deleted()`" + ` then the branch -processors are skipped for the given message, this allows you to conditionally -branch messages.`, - Examples: []docs.AnnotatedExample{ - { - Title: "HTTP Request", - Summary: ` -This example strips the request message into an empty body, grabs an HTTP -payload, and places the result back into the original message at the path -` + "`image.pull_count`" + `:`, - Config: ` +If the root of your request map is set to `+"`deleted()`"+` then the branch processors are skipped for the given message, this allows you to conditionally branch messages.`). + Example("HTTP Request", ` +This example strips the request message into an empty body, grabs an HTTP payload, and places the result back into the original message at the path `+"`image.pull_count`"+`:`, ` pipeline: processors: - branch: @@ -116,13 +60,9 @@ pipeline: # Example input: {"id":"foo","some":"pre-existing data"} # Example output: {"id":"foo","some":"pre-existing data","image":{"pull_count":1234}} -`, - }, - { - Title: "Non Structured Results", - Summary: ` -When the result of your branch processors is unstructured and you wish to simply set a resulting field to the raw output use the content function to obtain the raw bytes of the resulting message and then coerce it into your value type of choice:`, - Config: ` +`). + Example("Non Structured Results", ` +When the result of your branch processors is unstructured and you wish to simply set a resulting field to the raw output use the content function to obtain the raw bytes of the resulting message and then coerce it into your value type of choice:`, ` pipeline: processors: - branch: @@ -136,15 +76,9 @@ pipeline: # Example input: {"document":{"id":"foo","content":"hello world"}} # Example output: {"document":{"id":"foo","content":"hello world","description":"this is a cool doc"}} -`, - }, - { - Title: "Lambda Function", - Summary: ` -This example maps a new payload for triggering a lambda function with an ID and -username from the original message, and the result of the lambda is discarded, -meaning the original message is unchanged.`, - Config: ` +`). + Example("Lambda Function", ` +This example maps a new payload for triggering a lambda function with an ID and username from the original message, and the result of the lambda is discarded, meaning the original message is unchanged.`, ` pipeline: processors: - branch: @@ -155,14 +89,9 @@ pipeline: # Example input: {"doc":{"id":"foo","body":"hello world"},"user":{"name":"fooey"}} # Output matches the input, which is unchanged -`, - }, - { - Title: "Conditional Caching", - Summary: ` -This example caches a document by a message ID only when the type of the -document is a foo:`, - Config: ` +`). + Example("Conditional Caching", ` +This example caches a document by a message ID only when the type of the document is a foo:`, ` pipeline: processors: - branch: @@ -179,11 +108,53 @@ pipeline: operator: set key: ${! meta("id") } value: ${! content() } -`, - }, - }, - Config: docs.FieldComponent().WithChildren(branchFields...), - }) +`). + Fields(branchSpecFields()...) +} + +func branchSpecFields() []*service.ConfigField { + return []*service.ConfigField{ + service.NewBloblangField(branchProcFieldReqMap). + Description("A [Bloblang mapping](/docs/guides/bloblang/about) that describes how to create a request payload suitable for the child processors of this branch. If left empty then the branch will begin with an exact copy of the origin message (including metadata)."). + Examples(`root = { + "id": this.doc.id, + "content": this.doc.body.text +}`, + `root = if this.type == "foo" { + this.foo.request +} else { + deleted() +}`). + Default(""), + service.NewProcessorListField(branchProcFieldProcs). + Description("A list of processors to apply to mapped requests. When processing message batches the resulting batch must match the size and ordering of the input batch, therefore filtering, grouping should not be performed within these processors."), + service.NewBloblangField(branchProcFieldResMap). + Description("A [Bloblang mapping](/docs/guides/bloblang/about) that describes how the resulting messages from branched processing should be mapped back into the original payload. If left empty the origin message will remain unchanged (including metadata)."). + Examples(`meta foo_code = meta("code") +root.foo_result = this`, + `meta = meta() +root.bar.body = this.body +root.bar.id = this.user.id`, + `root.raw_result = content().string()`, + `root.enrichments.foo = if meta("request_failed") != null { + throw(meta("request_failed")) +} else { + this +}`). + Default(""), + } +} + +func init() { + err := service.RegisterBatchProcessor( + "branch", branchProcSpec(), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + b, err := newBranchFromParsed(conf, interop.UnwrapManagement(mgr)) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(b), nil + }) if err != nil { panic(err) } @@ -209,25 +180,11 @@ type Branch struct { mLatency metrics.StatTimer } -func newBranch(conf processor.BranchConfig, mgr bundle.NewManagement) (*Branch, error) { - children := make([]processor.V1, 0, len(conf.Processors)) - for i, pconf := range conf.Processors { - pMgr := mgr.IntoPath("branch", "processors", strconv.Itoa(i)) - proc, err := pMgr.NewProcessor(pconf) - if err != nil { - return nil, fmt.Errorf("failed to init processor %v: %w", i, err) - } - children = append(children, proc) - } - if len(children) == 0 { - return nil, errors.New("the branch processor requires at least one child processor") - } - +func newBranchFromParsed(conf *service.ParsedConfig, mgr bundle.NewManagement) (b *Branch, err error) { stats := mgr.Metrics() - b := &Branch{ - children: children, - log: mgr.Logger(), - tracer: mgr.Tracer(), + b = &Branch{ + log: mgr.Logger(), + tracer: mgr.Tracer(), mReceived: stats.GetCounter("processor_received"), mBatchReceived: stats.GetCounter("processor_batch_received"), @@ -237,14 +194,25 @@ func newBranch(conf processor.BranchConfig, mgr bundle.NewManagement) (*Branch, mLatency: stats.GetTimer("processor_latency_ns"), } - var err error - if len(conf.RequestMap) > 0 { - if b.requestMap, err = mgr.BloblEnvironment().NewMapping(conf.RequestMap); err != nil { + var pChildren []*service.OwnedProcessor + if pChildren, err = conf.FieldProcessorList(branchProcFieldProcs); err != nil { + return + } + if len(pChildren) == 0 { + return nil, errors.New("the branch processor requires at least one child processor") + } + b.children = make([]processor.V1, len(pChildren)) + for i, c := range pChildren { + b.children[i] = interop.UnwrapOwnedProcessor(c) + } + + if reqMapStr, _ := conf.FieldString(branchProcFieldReqMap); len(reqMapStr) > 0 { + if b.requestMap, err = mgr.BloblEnvironment().NewMapping(reqMapStr); err != nil { return nil, fmt.Errorf("failed to parse request mapping: %w", err) } } - if len(conf.ResultMap) > 0 { - if b.resultMap, err = mgr.BloblEnvironment().NewMapping(conf.ResultMap); err != nil { + if resMapStr, _ := conf.FieldString(branchProcFieldResMap); len(resMapStr) > 0 { + if b.resultMap, err = mgr.BloblEnvironment().NewMapping(resMapStr); err != nil { return nil, fmt.Errorf("failed to parse result mapping: %w", err) } } diff --git a/internal/impl/pure/processor_branch_test.go b/internal/impl/pure/processor_branch_test.go index 4d848e7241..c0ea0c3e34 100644 --- a/internal/impl/pure/processor_branch_test.go +++ b/internal/impl/pure/processor_branch_test.go @@ -3,6 +3,7 @@ package pure_test import ( "context" "errors" + "fmt" "testing" "time" @@ -81,12 +82,10 @@ func TestBranchBasic(t *testing.T) { "do not carry error into branch": { requestMap: `root = this`, processorMap: `root = this - root.name_upper = this.name.uppercase()`, + root.name_upper = this.name.uppercase()`, resultMap: `root.result = if this.failme.bool(false) { - throw("this is a branch error") - } else { - this.name_upper - }`, + throw("this is a branch error") } else { this.name_upper + }`, input: []mockMsg{ msg(`{"id":0,"name":"first"}`).withErr(errors.New("this is a pre-existing failure")), msg(`{"failme":true,"id":1,"name":"second"}`), @@ -113,16 +112,16 @@ func TestBranchBasic(t *testing.T) { }, "filtered and failed mappings": { requestMap: `root = match { - this.id == 0 => throw("i dont like zero"), - this.id == 3 => deleted(), - _ => {"name":this.name,"id":this.id} - }`, + this.id == 0 => throw("i dont like zero"), + this.id == 3 => deleted(), + _ => {"name":this.name,"id":this.id} + }`, processorMap: `root = this - root.name_upper = this.name.uppercase()`, + root.name_upper = this.name.uppercase()`, resultMap: `root.result = match { - this.id == 2 => throw("i dont like two either"), - _ => this.name_upper - }`, + this.id == 2 => throw("i dont like two either"), + _ => this.name_upper + }`, input: []mockMsg{ msg(`{"id":0,"name":"first"}`), msg(`{"id":1,"name":"second"}`), @@ -202,15 +201,17 @@ func TestBranchBasic(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() - procConf := processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = test.processorMap - - conf := processor.NewConfig() - conf.Type = "branch" - conf.Branch.RequestMap = test.requestMap - conf.Branch.Processors = append(conf.Branch.Processors, procConf) - conf.Branch.ResultMap = test.resultMap + conf, err := processor.FromYAML(fmt.Sprintf(` +branch: + request_map: | + %v + processors: + - bloblang: | + %v + result_map: | + %v +`, test.requestMap, test.processorMap, test.resultMap)) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) diff --git a/internal/impl/pure/processor_cache.go b/internal/impl/pure/processor_cache.go index f65d755bfd..609836840b 100644 --- a/internal/impl/pure/processor_cache.go +++ b/internal/impl/pure/processor_cache.go @@ -10,48 +10,56 @@ import ( "github.com/benthosdev/benthos/v4/internal/bundle" "github.com/benthosdev/benthos/v4/internal/component" "github.com/benthosdev/benthos/v4/internal/component/cache" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newCache(conf.Cache, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("cache", p, mgr), nil - }, docs.ComponentSpec{ - Name: "cache", - Categories: []string{ - "Integration", - }, - Summary: ` -Performs operations against a [cache resource](/docs/components/caches/about) for each message, allowing you to store or retrieve data within message payloads.`, - Description: ` -For use cases where you wish to cache the result of processors consider using the ` + "[`cached` processor](/docs/components/processors/cached)" + ` instead. - -This processor will interpolate functions within the ` + "`key` and `value`" + ` fields individually for each message. This allows you to specify dynamic keys and values based on the contents of the message payloads and metadata. You can find a list of functions [here](/docs/configuration/interpolation#bloblang-queries).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("resource", "The [`cache` resource](/docs/components/caches/about) to target with this processor."), - docs.FieldString("operator", "The [operation](#operators) to perform with the cache.").HasOptions("set", "add", "get", "delete"), - docs.FieldString("key", "A key to use with the cache.").IsInterpolated(), - docs.FieldString("value", "A value to use with the cache (when applicable).").IsInterpolated(), - docs.FieldString( - "ttl", "The TTL of each individual item as a duration string. After this period an item will be eligible for removal during the next compaction. Not all caches support per-key TTLs, those that do will have a configuration field `default_ttl`, and those that do not will fall back to their generally configured TTL setting.", - "60s", "5m", "36h", - ).IsInterpolated().AtVersion("3.33.0").Advanced(), - ).ChildDefaultAndTypesFromStruct(processor.NewCacheConfig()), - Examples: []docs.AnnotatedExample{ - { - Title: "Deduplication", - Summary: ` -Deduplication can be done using the add operator with a key extracted from the -message payload, since it fails when a key already exists we can remove the -duplicates using a -[` + "`mapping` processor" + `](/docs/components/processors/mapping):`, - Config: ` +const ( + cachePFieldResource = "resource" + cachePFieldOperator = "operator" + cachePFieldKey = "key" + cachePFieldValue = "value" + cachePFieldTTL = "ttl" +) + +func cacheProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Integration"). + Stable(). + Summary("Performs operations against a [cache resource](/docs/components/caches/about) for each message, allowing you to store or retrieve data within message payloads."). + Description(` +For use cases where you wish to cache the result of processors consider using the `+"[`cached` processor](/docs/components/processors/cached)"+` instead. + +This processor will interpolate functions within the `+"`key` and `value`"+` fields individually for each message. This allows you to specify dynamic keys and values based on the contents of the message payloads and metadata. You can find a list of functions [here](/docs/configuration/interpolation#bloblang-queries).`). + Footnotes(` +## Operators + +### `+"`set`"+` + +Set a key in the cache to a value. If the key already exists the contents are +overridden. + +### `+"`add`"+` + +Set a key in the cache to a value. If the key already exists the action fails +with a 'key already exists' error, which can be detected with +[processor error handling](/docs/configuration/error_handling). + +### `+"`get`"+` + +Retrieve the contents of a cached key and replace the original message payload +with the result. If the key does not exist the action fails with an error, which +can be detected with [processor error handling](/docs/configuration/error_handling). + +### `+"`delete`"+` + +Delete a key and its contents from the cache. If the key does not exist the +action is a no-op and will not fail with an error.`). + Example("Deduplication", ` +Deduplication can be done using the add operator with a key extracted from the message payload, since it fails when a key already exists we can remove the duplicates using a [`+"`mapping` processor"+`](/docs/components/processors/mapping):`, + ` pipeline: processors: - cache: @@ -65,13 +73,10 @@ cache_resources: - label: foocache redis: url: tcp://TODO:6379 -`, - }, - { - Title: "Deduplication Batch-Wide", - Summary: ` -Sometimes it's necessary to deduplicate a batch of messages (AKA a window) by a single identifying value. This can be done by introducing a ` + "[`branch` processor](/docs/components/processors/branch)" + `, which executes the cache only once on behalf of the batch, in this case with a value make from a field extracted from the first and last messages of the batch:`, - Config: ` +`). + Example("Deduplication Batch-Wide", ` +Sometimes it's necessary to deduplicate a batch of messages (AKA a window) by a single identifying value. This can be done by introducing a `+"[`branch` processor](/docs/components/processors/branch)"+`, which executes the cache only once on behalf of the batch, in this case with a value make from a field extracted from the first and last messages of the batch:`, + ` pipeline: processors: # Try and add one message to a cache that identifies the whole batch @@ -82,6 +87,7 @@ pipeline: } else { deleted() } processors: - cache: + resource: foocache operator: add key: ${! content() } value: t @@ -90,14 +96,10 @@ pipeline: root = if errored().from(0) { deleted() } -`, - }, - { - Title: "Hydration", - Summary: ` -It's possible to enrich payloads with content previously stored in a cache by -using the [` + "`branch`" + `](/docs/components/processors/branch) processor:`, - Config: ` +`). + Example("Hydration", ` +It's possible to enrich payloads with content previously stored in a cache by using the [`+"`branch`"+`](/docs/components/processors/branch) processor:`, + ` pipeline: processors: - branch: @@ -116,34 +118,60 @@ cache_resources: - label: foocache memcached: addresses: [ "TODO:11211" ] -`, - }, - }, - Footnotes: ` -## Operators - -### ` + "`set`" + ` - -Set a key in the cache to a value. If the key already exists the contents are -overridden. - -### ` + "`add`" + ` - -Set a key in the cache to a value. If the key already exists the action fails -with a 'key already exists' error, which can be detected with -[processor error handling](/docs/configuration/error_handling). - -### ` + "`get`" + ` +`). + Fields( + service.NewStringField(cachePFieldResource). + Description("The [`cache` resource](/docs/components/caches/about) to target with this processor."), + service.NewStringEnumField(cachePFieldOperator, "set", "add", "get", "delete"). + Description("The [operation](#operators) to perform with the cache."), + service.NewInterpolatedStringField(cachePFieldKey). + Description("A key to use with the cache."), + service.NewInterpolatedStringField(cachePFieldValue). + Description("A value to use with the cache (when applicable)."). + Optional(), + service.NewInterpolatedStringField(cachePFieldTTL). + Description("The TTL of each individual item as a duration string. After this period an item will be eligible for removal during the next compaction. Not all caches support per-key TTLs, those that do will have a configuration field `default_ttl`, and those that do not will fall back to their generally configured TTL setting."). + Examples("60s", "5m", "36h"). + Version("3.33.0"). + Advanced(). + Optional(), + ) +} -Retrieve the contents of a cached key and replace the original message payload -with the result. If the key does not exist the action fails with an error, which -can be detected with [processor error handling](/docs/configuration/error_handling). +type cacheProcConfig struct { + Resource string + Operator string + Key string + Value string + TTL string +} -### ` + "`delete`" + ` +func init() { + err := service.RegisterBatchProcessor( + "cache", cacheProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + var cConf cacheProcConfig + var err error + + if cConf.Resource, err = conf.FieldString(cachePFieldResource); err != nil { + return nil, err + } + if cConf.Operator, err = conf.FieldString(cachePFieldOperator); err != nil { + return nil, err + } + if cConf.Key, err = conf.FieldString(cachePFieldKey); err != nil { + return nil, err + } + cConf.Value, _ = conf.FieldString(cachePFieldValue) + cConf.TTL, _ = conf.FieldString(cachePFieldTTL) -Delete a key and its contents from the cache. If the key does not exist the -action is a no-op and will not fail with an error.`, - }) + mgr := interop.UnwrapManagement(res) + p, err := newCache(cConf, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("cache", p, mgr)), nil + }) if err != nil { panic(err) } @@ -161,7 +189,7 @@ type cacheProc struct { operator cacheOperator } -func newCache(conf processor.CacheConfig, mgr bundle.NewManagement) (*cacheProc, error) { +func newCache(conf cacheProcConfig, mgr bundle.NewManagement) (*cacheProc, error) { cacheName := conf.Resource if cacheName == "" { return nil, errors.New("cache name must be specified") diff --git a/internal/impl/pure/processor_cache_test.go b/internal/impl/pure/processor_cache_test.go index de190893e6..5d2360fd78 100644 --- a/internal/impl/pure/processor_cache_test.go +++ b/internal/impl/pure/processor_cache_test.go @@ -19,12 +19,15 @@ func TestCacheSet(t *testing.T) { mgr := mock.NewManager() mgr.Caches["foocache"] = map[string]mock.CacheItem{} - conf := processor.NewConfig() - conf.Type = "cache" - conf.Cache.Operator = "set" - conf.Cache.Key = "${!json(\"key\")}" - conf.Cache.Value = "${!json(\"value\")}" - conf.Cache.Resource = "foocache" + conf, err := processor.FromYAML(` +cache: + operator: set + key: ${!json("key")} + value: ${!json("value")} + resource: foocache +`) + require.NoError(t, err) + proc, err := mgr.NewProcessor(conf) if err != nil { t.Fatal(err) @@ -62,12 +65,15 @@ func TestCacheAdd(t *testing.T) { mgr := mock.NewManager() mgr.Caches["foocache"] = map[string]mock.CacheItem{} - conf := processor.NewConfig() - conf.Type = "cache" - conf.Cache.Key = "${!json(\"key\")}" - conf.Cache.Value = "${!json(\"value\")}" - conf.Cache.Resource = "foocache" - conf.Cache.Operator = "add" + conf, err := processor.FromYAML(` +cache: + key: ${!json("key")} + value: ${!json("value")} + resource: foocache + operator: add +`) + require.NoError(t, err) + proc, err := mgr.NewProcessor(conf) if err != nil { t.Fatal(err) @@ -112,11 +118,14 @@ func TestCacheGet(t *testing.T) { "2": {Value: "foo 2"}, } - conf := processor.NewConfig() - conf.Type = "cache" - conf.Cache.Key = "${!json(\"key\")}" - conf.Cache.Resource = "foocache" - conf.Cache.Operator = "get" + conf, err := processor.FromYAML(` +cache: + operator: get + key: ${!json("key")} + resource: foocache +`) + require.NoError(t, err) + proc, err := mgr.NewProcessor(conf) if err != nil { t.Fatal(err) @@ -159,11 +168,14 @@ func TestCacheDelete(t *testing.T) { "3": {Value: "foo 3"}, } - conf := processor.NewConfig() - conf.Type = "cache" - conf.Cache.Key = "${!json(\"key\")}" - conf.Cache.Resource = "foocache" - conf.Cache.Operator = "delete" + conf, err := processor.FromYAML(` +cache: + operator: delete + key: ${!json("key")} + resource: foocache +`) + require.NoError(t, err) + proc, err := mgr.NewProcessor(conf) if err != nil { t.Fatal(err) diff --git a/internal/impl/pure/processor_catch.go b/internal/impl/pure/processor_catch.go index 1c94e1961b..cbf92dc4ef 100644 --- a/internal/impl/pure/processor_catch.go +++ b/internal/impl/pure/processor_catch.go @@ -2,81 +2,61 @@ package pure import ( "context" - "errors" - "strconv" - "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newCatch(conf.Catch, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("catch", p, mgr), nil - }, docs.ComponentSpec{ - Name: "catch", - Categories: []string{ - "Composition", - }, - Summary: ` -Applies a list of child processors _only_ when a previous processing step has -failed.`, - Description: ` -Behaves similarly to the ` + "[`for_each`](/docs/components/processors/for_each)" + ` processor, where a -list of child processors are applied to individual messages of a batch. However, -processors are only applied to messages that failed a processing step prior to -the catch. + err := service.RegisterBatchProcessor("catch", service.NewConfigSpec(). + Stable(). + Categories("Composition"). + Summary("Applies a list of child processors _only_ when a previous processing step has failed."). + Description(` +Behaves similarly to the `+"[`for_each`](/docs/components/processors/for_each)"+` processor, where a list of child processors are applied to individual messages of a batch. However, processors are only applied to messages that failed a processing step prior to the catch. For example, with the following config: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - resource: foo - catch: - resource: bar - resource: baz -` + "```" + ` - -If the processor ` + "`foo`" + ` fails for a particular message, that message -will be fed into the processors ` + "`bar` and `baz`" + `. Messages that do not -fail for the processor ` + "`foo`" + ` will skip these processors. - -When messages leave the catch block their fail flags are cleared. This processor -is useful for when it's possible to recover failed messages, or when special -actions (such as logging/metrics) are required before dropping them. - -More information about error handling can be found [here](/docs/configuration/error_handling).`, - Config: docs.FieldProcessor("", "").Array(). - LinterFunc(func(ctx docs.LintContext, line, col int, value any) []docs.Lint { - childProcs, ok := value.([]any) - if !ok { - return nil - } - for _, child := range childProcs { - childObj, ok := child.(map[string]any) - if !ok { - continue - } - if _, exists := childObj["catch"]; exists { - // No need to lint as a nested catch will clear errors, - // allowing nested try blocks to work as expected. - return nil - } - if _, exists := childObj["try"]; exists { - return []docs.Lint{ - docs.NewLintError(line, docs.LintCustom, errors.New("`catch` block contains a `try` block which will never execute due to errors only being cleared at the end of the `catch`, for more information about nesting `try` within `catch` read: https://www.benthos.dev/docs/components/processors/try#nesting-within-a-catch-block")), - } - } - } - return nil - }), - }) +`+"```"+` + +If the processor `+"`foo`"+` fails for a particular message, that message will be fed into the processors `+"`bar` and `baz`"+`. Messages that do not fail for the processor `+"`foo`"+` will skip these processors. + +When messages leave the catch block their fail flags are cleared. This processor is useful for when it's possible to recover failed messages, or when special actions (such as logging/metrics) are required before dropping them. + +More information about error handling can be found [here](/docs/configuration/error_handling).`). + LintRule(`if this.or([]).any(pconf -> pconf.type.or("") == "try" || pconf.try.type() == "array" ) { + "'catch' block contains a 'try' block which will never execute due to errors only being cleared at the end of the 'catch', for more information about nesting 'try' within 'catch' read: https://www.benthos.dev/docs/components/processors/try#nesting-within-a-catch-block" +}`). + Field(service.NewProcessorListField("").Default([]any{})), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + mgr := interop.UnwrapManagement(res) + childPubProcs, err := conf.FieldProcessorList() + if err != nil { + return nil, err + } + + childProcs := make([]processor.V1, len(childPubProcs)) + for i, p := range childPubProcs { + childProcs[i] = interop.UnwrapOwnedProcessor(p) + } + + tp, err := newCatch(childProcs) + if err != nil { + return nil, err + } + + p := processor.NewAutoObservedBatchedProcessor("catch", tp, mgr) + return interop.NewUnwrapInternalBatchProcessor(p), nil + }) if err != nil { panic(err) } @@ -88,16 +68,7 @@ type catchProc struct { children []processor.V1 } -func newCatch(conf []processor.Config, mgr bundle.NewManagement) (*catchProc, error) { - var children []processor.V1 - for i, pconf := range conf { - pMgr := mgr.IntoPath("catch", strconv.Itoa(i)) - proc, err := pMgr.NewProcessor(pconf) - if err != nil { - return nil, err - } - children = append(children, proc) - } +func newCatch(children []processor.V1) (*catchProc, error) { return &catchProc{ children: children, }, nil diff --git a/internal/impl/pure/processor_catch_test.go b/internal/impl/pure/processor_catch_test.go index a40231518e..6c13d24b62 100644 --- a/internal/impl/pure/processor_catch_test.go +++ b/internal/impl/pure/processor_catch_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -16,8 +17,10 @@ import ( ) func TestCatchEmpty(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "catch" + conf, err := processor.FromYAML(` +catch: [] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -47,13 +50,11 @@ func TestCatchEmpty(t *testing.T) { } func TestCatchBasic(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - conf := processor.NewConfig() - conf.Type = "catch" - conf.Catch = append(conf.Catch, encodeConf) + conf, err := processor.FromYAML(` +catch: + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -96,13 +97,11 @@ func TestCatchBasic(t *testing.T) { } func TestCatchFilterSome(t *testing.T) { - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "catch" - conf.Catch = append(conf.Catch, filterConf) + conf, err := processor.FromYAML(` +catch: + - bloblang: 'root = if !content().contains("foo") { deleted() }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -143,17 +142,12 @@ func TestCatchFilterSome(t *testing.T) { } func TestCatchMultiProcs(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "catch" - conf.Catch = append(conf.Catch, filterConf, encodeConf) + conf, err := processor.FromYAML(` +catch: + - bloblang: 'root = if !content().contains("foo") { deleted() }' + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -194,13 +188,11 @@ func TestCatchMultiProcs(t *testing.T) { } func TestCatchNotFails(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - conf := processor.NewConfig() - conf.Type = "catch" - conf.Catch = append(conf.Catch, encodeConf) + conf, err := processor.FromYAML(` +catch: + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -240,13 +232,11 @@ func TestCatchNotFails(t *testing.T) { } func TestCatchFilterAll(t *testing.T) { - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "catch" - conf.Catch = append(conf.Catch, filterConf) + conf, err := processor.FromYAML(` +catch: + - bloblang: 'root = if !content().contains("foo") { deleted() }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_compress.go b/internal/impl/pure/processor_compress.go index 33476ba584..410aa0ec75 100644 --- a/internal/impl/pure/processor_compress.go +++ b/internal/impl/pure/processor_compress.go @@ -2,36 +2,55 @@ package pure import ( "context" + "fmt" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + compressPFieldAlgorithm = "algorithm" + compressPFieldLevel = "level" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newCompress(conf.Compress, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("compress", p, mgr), nil - }, docs.ComponentSpec{ - Name: "compress", - Categories: []string{ - "Parsing", - }, - Summary: ` -Compresses messages according to the selected algorithm. Supported compression -algorithms are: gzip, pgzip, zlib, flate, snappy, lz4.`, - Description: ` -The 'level' field might not apply to all algorithms.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("algorithm", "The compression algorithm to use.").HasOptions("gzip", "pgzip", "zlib", "flate", "snappy", "lz4"), - docs.FieldInt("level", "The level of compression to use. May not be applicable to all algorithms."), - ).ChildDefaultAndTypesFromStruct(processor.NewCompressConfig()), - }) + compAlgs := CompressionAlgsList() + err := service.RegisterBatchProcessor( + "compress", service.NewConfigSpec(). + Categories("Parsing"). + Stable(). + Summary(fmt.Sprintf("Compresses messages according to the selected algorithm. Supported compression algorithms are: %v", compAlgs)). + Description(`The 'level' field might not apply to all algorithms.`). + Fields( + service.NewStringEnumField(compressPFieldAlgorithm, compAlgs...). + Description("The compression algorithm to use."). + LintRule(``), + service.NewIntField(compressPFieldLevel). + Description("The level of compression to use. May not be applicable to all algorithms."). + Default(-1), + ), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + algStr, err := conf.FieldString(compressPFieldAlgorithm) + if err != nil { + return nil, err + } + + level, err := conf.FieldInt(compressPFieldLevel) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newCompress(algStr, level, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("compress", p, mgr)), nil + }) if err != nil { panic(err) } @@ -43,13 +62,13 @@ type compressProc struct { log log.Modular } -func newCompress(conf processor.CompressConfig, mgr bundle.NewManagement) (*compressProc, error) { - cor, err := strToCompressFunc(conf.Algorithm) +func newCompress(algStr string, level int, mgr bundle.NewManagement) (*compressProc, error) { + cor, err := strToCompressFunc(algStr) if err != nil { return nil, err } return &compressProc{ - level: conf.Level, + level: level, comp: cor, log: mgr.Logger(), }, nil diff --git a/internal/impl/pure/processor_compress_test.go b/internal/impl/pure/processor_compress_test.go index 2b697c4e51..f08567db0f 100644 --- a/internal/impl/pure/processor_compress_test.go +++ b/internal/impl/pure/processor_compress_test.go @@ -12,6 +12,7 @@ import ( "github.com/klauspost/compress/zlib" "github.com/klauspost/pgzip" "github.com/pierrec/lz4/v4" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -19,20 +20,24 @@ import ( ) func TestCompressBadAlgo(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "compress" - conf.Compress.Algorithm = "does not exist" + conf, err := processor.FromYAML(` +compress: + algorithm: does not exist +`) + require.NoError(t, err) - _, err := mock.NewManager().NewProcessor(conf) + _, err = mock.NewManager().NewProcessor(conf) if err == nil { t.Error("Expected error from bad algo") } } func TestCompressGZIP(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "compress" - conf.Compress.Algorithm = "gzip" + conf, err := processor.FromYAML(` +compress: + algorithm: gzip +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -75,9 +80,11 @@ func TestCompressGZIP(t *testing.T) { } func TestCompressPGZIP(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "compress" - conf.Compress.Algorithm = "pgzip" + conf, err := processor.FromYAML(` +compress: + algorithm: pgzip +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -120,9 +127,11 @@ func TestCompressPGZIP(t *testing.T) { } func TestCompressZLIB(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "compress" - conf.Compress.Algorithm = "zlib" + conf, err := processor.FromYAML(` +compress: + algorithm: zlib +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -165,9 +174,11 @@ func TestCompressZLIB(t *testing.T) { } func TestCompressFlate(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "compress" - conf.Compress.Algorithm = "flate" + conf, err := processor.FromYAML(` +compress: + algorithm: flate +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -182,7 +193,7 @@ func TestCompressFlate(t *testing.T) { for i := range input { var buf bytes.Buffer - zw, err := flate.NewWriter(&buf, conf.Compress.Level) + zw, err := flate.NewWriter(&buf, -1) if err != nil { t.Fatal(err) } @@ -213,9 +224,11 @@ func TestCompressFlate(t *testing.T) { } func TestCompressSnappy(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "compress" - conf.Compress.Algorithm = "snappy" + conf, err := processor.FromYAML(` +compress: + algorithm: snappy +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -253,9 +266,11 @@ func TestCompressSnappy(t *testing.T) { } func TestCompressLZ4(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "compress" - conf.Compress.Algorithm = "lz4" + conf, err := processor.FromYAML(` +compress: + algorithm: lz4 +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), diff --git a/internal/impl/pure/processor_decompress.go b/internal/impl/pure/processor_decompress.go index c3b21b7405..8217a9b941 100644 --- a/internal/impl/pure/processor_decompress.go +++ b/internal/impl/pure/processor_decompress.go @@ -2,33 +2,45 @@ package pure import ( "context" + "fmt" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + decompressPFieldAlgorithm = "algorithm" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newDecompress(conf.Decompress, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("decompress", p, mgr), nil - }, docs.ComponentSpec{ - Name: "decompress", - Categories: []string{ - "Parsing", - }, - Summary: ` -Decompresses messages according to the selected algorithm. Supported -decompression types are: gzip, pgzip, zlib, bzip2, flate, snappy, lz4.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("algorithm", "The decompression algorithm to use.").HasOptions("gzip", "pgzip", "zlib", "bzip2", "flate", "snappy", "lz4"), - ).ChildDefaultAndTypesFromStruct(processor.NewDecompressConfig()), - }) + compAlgs := DecompressionAlgsList() + err := service.RegisterBatchProcessor( + "decompress", service.NewConfigSpec(). + Categories("Parsing"). + Stable(). + Summary(fmt.Sprintf("Decompresses messages according to the selected algorithm. Supported decompression algorithms are: %v", compAlgs)). + Fields( + service.NewStringEnumField(decompressPFieldAlgorithm, compAlgs...). + Description("The decompression algorithm to use."). + LintRule(``), + ), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + algStr, err := conf.FieldString(compressPFieldAlgorithm) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newDecompress(algStr, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("decompress", p, mgr)), nil + }) if err != nil { panic(err) } @@ -39,8 +51,8 @@ type decompressProc struct { log log.Modular } -func newDecompress(conf processor.DecompressConfig, mgr bundle.NewManagement) (*decompressProc, error) { - dcor, err := strToDecompressFunc(conf.Algorithm) +func newDecompress(algStr string, mgr bundle.NewManagement) (*decompressProc, error) { + dcor, err := strToDecompressFunc(algStr) if err != nil { return nil, err } diff --git a/internal/impl/pure/processor_decompress_test.go b/internal/impl/pure/processor_decompress_test.go index 0663bae94a..f606404b78 100644 --- a/internal/impl/pure/processor_decompress_test.go +++ b/internal/impl/pure/processor_decompress_test.go @@ -12,6 +12,7 @@ import ( "github.com/klauspost/compress/zlib" "github.com/klauspost/pgzip" "github.com/pierrec/lz4/v4" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -19,20 +20,24 @@ import ( ) func TestDecompressBadAlgo(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "decompress" - conf.Decompress.Algorithm = "does not exist" + conf, err := processor.FromYAML(` +decompress: + algorithm: does not exist +`) + require.NoError(t, err) - _, err := mock.NewManager().NewProcessor(conf) + _, err = mock.NewManager().NewProcessor(conf) if err == nil { t.Error("Expected error from bad algo") } } func TestDecompressGZIP(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "decompress" - conf.Decompress.Algorithm = "gzip" + conf, err := processor.FromYAML(` +decompress: + algorithm: gzip +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -77,9 +82,11 @@ func TestDecompressGZIP(t *testing.T) { } func TestDecompressPGZIP(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "decompress" - conf.Decompress.Algorithm = "pgzip" + conf, err := processor.FromYAML(` +decompress: + algorithm: pgzip +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -124,9 +131,11 @@ func TestDecompressPGZIP(t *testing.T) { } func TestDecompressSnappy(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "decompress" - conf.Decompress.Algorithm = "snappy" + conf, err := processor.FromYAML(` +decompress: + algorithm: snappy +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -164,9 +173,11 @@ func TestDecompressSnappy(t *testing.T) { } func TestDecompressZLIB(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "decompress" - conf.Decompress.Algorithm = "zlib" + conf, err := processor.FromYAML(` +decompress: + algorithm: zlib +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -211,9 +222,11 @@ func TestDecompressZLIB(t *testing.T) { } func TestDecompressFlate(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "decompress" - conf.Decompress.Algorithm = "flate" + conf, err := processor.FromYAML(` +decompress: + algorithm: flate +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), @@ -261,9 +274,11 @@ func TestDecompressFlate(t *testing.T) { } func TestDecompressLZ4(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "decompress" - conf.Decompress.Algorithm = "lz4" + conf, err := processor.FromYAML(` +decompress: + algorithm: lz4 +`) + require.NoError(t, err) input := [][]byte{ []byte("hello world first part"), diff --git a/internal/impl/pure/processor_dedupe.go b/internal/impl/pure/processor_dedupe.go index ae7a2c4621..368f2c2225 100644 --- a/internal/impl/pure/processor_dedupe.go +++ b/internal/impl/pure/processor_dedupe.go @@ -9,49 +9,42 @@ import ( "github.com/benthosdev/benthos/v4/internal/bundle" "github.com/benthosdev/benthos/v4/internal/component" "github.com/benthosdev/benthos/v4/internal/component/cache" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newDedupe(conf.Dedupe, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("dedupe", p, mgr), nil - }, docs.ComponentSpec{ - Name: "dedupe", - Categories: []string{ - "Utility", - }, - Summary: `Deduplicates messages by storing a key value in a cache using the ` + "`add`" + ` operator. If the key already exists within the cache it is dropped.`, - Description: ` +const ( + dedupFieldCache = "cache" + dedupFieldKey = "key" + dedupFieldDropOnCacheErr = "drop_on_err" +) + +func dedupeProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary(`Deduplicates messages by storing a key value in a cache using the `+"`add`"+` operator. If the key already exists within the cache it is dropped.`). + Description(` Caches must be configured as resources, for more information check out the [cache documentation here](/docs/components/caches/about). -When using this processor with an output target that might fail you should always wrap the output within an indefinite ` + "[`retry`](/docs/components/outputs/retry)" + ` block. This ensures that during outages your messages aren't reprocessed after failures, which would result in messages being dropped. +When using this processor with an output target that might fail you should always wrap the output within an indefinite `+"[`retry`](/docs/components/outputs/retry)"+` block. This ensures that during outages your messages aren't reprocessed after failures, which would result in messages being dropped. ## Batch Deduplication -This processor enacts on individual messages only, in order to perform a deduplication on behalf of a batch (or window) of messages instead use the ` + "[`cache` processor](/docs/components/processors/cache#examples)" + `. +This processor enacts on individual messages only, in order to perform a deduplication on behalf of a batch (or window) of messages instead use the `+"[`cache` processor](/docs/components/processors/cache#examples)"+`. ## Delivery Guarantees Performing deduplication on a stream using a distributed cache voids any at-least-once guarantees that it previously had. This is because the cache will preserve message signatures even if the message fails to leave the Benthos pipeline, which would cause message loss in the event of an outage at the output sink followed by a restart of the Benthos instance (or a server crash, etc). -This problem can be mitigated by using an in-memory cache and distributing messages to horizontally scaled Benthos pipelines partitioned by the deduplication key. However, in situations where at-least-once delivery guarantees are important it is worth avoiding deduplication in favour of implement idempotent behaviour at the edge of your stream pipelines.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("cache", "The [`cache` resource](/docs/components/caches/about) to target with this processor."), - docs.FieldString("key", "An interpolated string yielding the key to deduplicate by for each message.", `${! meta("kafka_key") }`, `${! content().hash("xxhash64") }`).IsInterpolated(), - docs.FieldBool("drop_on_err", "Whether messages should be dropped when the cache returns a general error such as a network issue."), - ).ChildDefaultAndTypesFromStruct(processor.NewDedupeConfig()), - Examples: []docs.AnnotatedExample{ - { - Title: "Deduplicate based on Kafka key", - Summary: "The following configuration demonstrates a pipeline that deduplicates messages based on the Kafka key.", - Config: ` +This problem can be mitigated by using an in-memory cache and distributing messages to horizontally scaled Benthos pipelines partitioned by the deduplication key. However, in situations where at-least-once delivery guarantees are important it is worth avoiding deduplication in favour of implement idempotent behaviour at the edge of your stream pipelines.`). + Example( + "Deduplicate based on Kafka key", + "The following configuration demonstrates a pipeline that deduplicates messages based on the Kafka key.", + ` pipeline: processors: - dedupe: @@ -63,9 +56,45 @@ cache_resources: memory: default_ttl: 60s `, - }, - }, - }) + ). + Fields( + service.NewStringField(dedupFieldCache). + Description("The [`cache` resource](/docs/components/caches/about) to target with this processor."), + service.NewInterpolatedStringField(dedupFieldKey). + Description("An interpolated string yielding the key to deduplicate by for each message."). + Examples(`${! meta("kafka_key") }`, `${! content().hash("xxhash64") }`), + service.NewBoolField(dedupFieldDropOnCacheErr). + Description("Whether messages should be dropped when the cache returns a general error such as a network issue."). + Default(true), + ) +} + +func init() { + err := service.RegisterBatchProcessor( + "dedupe", dedupeProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + cache, err := conf.FieldString(dedupFieldCache) + if err != nil { + return nil, err + } + + keyStr, err := conf.FieldString(dedupFieldKey) + if err != nil { + return nil, err + } + + dropOnErr, err := conf.FieldBool(dedupFieldDropOnCacheErr) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newDedupe(cache, keyStr, dropOnErr, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("dedupe", p, mgr)), nil + }) if err != nil { panic(err) } @@ -80,30 +109,28 @@ type dedupeProc struct { cacheName string } -func newDedupe(conf processor.DedupeConfig, mgr bundle.NewManagement) (*dedupeProc, error) { - if conf.Key == "" { +func newDedupe(cache, keyStr string, dropOnErr bool, mgr bundle.NewManagement) (*dedupeProc, error) { + if keyStr == "" { return nil, errors.New("dedupe key must not be empty") } - key, err := mgr.BloblEnvironment().NewField(conf.Key) + key, err := mgr.BloblEnvironment().NewField(keyStr) if err != nil { return nil, fmt.Errorf("failed to parse key expression: %v", err) } - if !mgr.ProbeCache(conf.Cache) { - return nil, fmt.Errorf("cache resource '%v' was not found", conf.Cache) + if !mgr.ProbeCache(cache) { + return nil, fmt.Errorf("cache resource '%v' was not found", cache) } return &dedupeProc{ log: mgr.Logger(), - dropOnErr: conf.DropOnCacheErr, + dropOnErr: dropOnErr, key: key, mgr: mgr, - cacheName: conf.Cache, + cacheName: cache, }, nil } -//------------------------------------------------------------------------------ - func (d *dedupeProc) ProcessBatch(ctx *processor.BatchProcContext, batch message.Batch) ([]message.Batch, error) { newBatch := message.QuickBatch(nil) _ = batch.Iter(func(i int, p *message.Part) error { diff --git a/internal/impl/pure/processor_dedupe_test.go b/internal/impl/pure/processor_dedupe_test.go index ee87316194..d3c062b4b5 100644 --- a/internal/impl/pure/processor_dedupe_test.go +++ b/internal/impl/pure/processor_dedupe_test.go @@ -20,10 +20,12 @@ func TestDedupe(t *testing.T) { mgr := mock.NewManager() mgr.Caches["foocache"] = map[string]mock.CacheItem{} - conf := processor.NewConfig() - conf.Type = "dedupe" - conf.Dedupe.Cache = "foocache" - conf.Dedupe.Key = "${! content() }" + conf, err := processor.FromYAML(` +dedupe: + cache: foocache + key: ${! content() } +`) + require.NoError(t, err) proc, err := mgr.NewProcessor(conf) require.NoError(t, err) @@ -56,20 +58,24 @@ func TestDedupe(t *testing.T) { } func TestDedupeBadCache(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "dedupe" - conf.Dedupe.Cache = "foocache" + conf, err := processor.FromYAML(` +dedupe: + cache: foocache +`) + require.NoError(t, err) mgr := mock.NewManager() - _, err := mgr.NewProcessor(conf) + _, err = mgr.NewProcessor(conf) require.Error(t, err) } func TestDedupeCacheErrors(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "dedupe" - conf.Dedupe.Cache = "foocache" - conf.Dedupe.Key = "${! content() }" + conf, err := processor.FromYAML(` +dedupe: + cache: foocache + key: ${! content() } +`) + require.NoError(t, err) mgr := mock.NewManager() mgr.Caches["foocache"] = map[string]mock.CacheItem{} @@ -83,7 +89,13 @@ func TestDedupeCacheErrors(t *testing.T) { require.NoError(t, err) assert.Len(t, msgs, 0) - conf.Dedupe.DropOnCacheErr = false + conf, err = processor.FromYAML(` +dedupe: + cache: foocache + key: ${! content() } + drop_on_err: false +`) + require.NoError(t, err) mgr.Caches["foocache"] = map[string]mock.CacheItem{} proc, err = mgr.NewProcessor(conf) diff --git a/internal/impl/pure/processor_for_each.go b/internal/impl/pure/processor_for_each.go index cb8400dcf8..5a649f83bc 100644 --- a/internal/impl/pure/processor_for_each.go +++ b/internal/impl/pure/processor_for_each.go @@ -2,40 +2,44 @@ package pure import ( "context" - "fmt" - "strconv" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newForEach(conf.ForEach, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("for_each", p, mgr), nil - }, docs.ComponentSpec{ - Name: "for_each", - Categories: []string{ - "Composition", - }, - Summary: ` -A processor that applies a list of child processors to messages of a batch as -though they were each a batch of one message.`, - Description: ` -This is useful for forcing batch wide processors such as -` + "[`dedupe`](/docs/components/processors/dedupe)" + ` or interpolations such -as the ` + "`value`" + ` field of the ` + "`metadata`" + ` processor to execute -on individual message parts of a batch instead. + err := service.RegisterBatchProcessor("for_each", service.NewConfigSpec(). + Stable(). + Categories("Composition"). + Summary("A processor that applies a list of child processors to messages of a batch as though they were each a batch of one message."). + Description(` +This is useful for forcing batch wide processors such as `+"[`dedupe`](/docs/components/processors/dedupe)"+` or interpolations such as the `+"`value`"+` field of the `+"`metadata`"+` processor to execute on individual message parts of a batch instead. -Please note that most processors already process per message of a batch, and -this processor is not needed in those cases.`, - Config: docs.FieldProcessor("", "").Array().HasDefault([]any{}), - }) +Please note that most processors already process per message of a batch, and this processor is not needed in those cases.`). + Field(service.NewProcessorListField("").Default([]any{})), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + mgr := interop.UnwrapManagement(res) + childPubProcs, err := conf.FieldProcessorList() + if err != nil { + return nil, err + } + + childProcs := make([]processor.V1, len(childPubProcs)) + for i, p := range childPubProcs { + childProcs[i] = interop.UnwrapOwnedProcessor(p) + } + + tp, err := newForEach(childProcs, mgr) + if err != nil { + return nil, err + } + + p := processor.NewAutoObservedBatchedProcessor("for_each", tp, mgr) + return interop.NewUnwrapInternalBatchProcessor(p), nil + }) if err != nil { panic(err) } @@ -45,16 +49,7 @@ type forEachProc struct { children []processor.V1 } -func newForEach(conf []processor.Config, mgr bundle.NewManagement) (*forEachProc, error) { - var children []processor.V1 - for i, pconf := range conf { - pMgr := mgr.IntoPath("for_each", strconv.Itoa(i)) - proc, err := pMgr.NewProcessor(pconf) - if err != nil { - return nil, fmt.Errorf("child processor [%v]: %w", i, err) - } - children = append(children, proc) - } +func newForEach(children []processor.V1, mgr bundle.NewManagement) (*forEachProc, error) { return &forEachProc{children: children}, nil } diff --git a/internal/impl/pure/processor_for_each_test.go b/internal/impl/pure/processor_for_each_test.go index dd0eb8643d..40d690178b 100644 --- a/internal/impl/pure/processor_for_each_test.go +++ b/internal/impl/pure/processor_for_each_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -17,8 +18,10 @@ import ( //------------------------------------------------------------------------------ func TestForEachEmpty(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "for_each" + conf, err := processor.FromYAML(` +for_each: [] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -42,13 +45,11 @@ func TestForEachEmpty(t *testing.T) { } func TestForEachBasic(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - conf := processor.NewConfig() - conf.Type = "for_each" - conf.ForEach = append(conf.ForEach, encodeConf) + conf, err := processor.FromYAML(` +for_each: + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -79,13 +80,11 @@ func TestForEachBasic(t *testing.T) { } func TestForEachFilterSome(t *testing.T) { - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "for_each" - conf.ForEach = append(conf.ForEach, filterConf) + conf, err := processor.FromYAML(` +for_each: + - bloblang: 'root = if !content().contains("foo") { deleted() }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -115,17 +114,12 @@ func TestForEachFilterSome(t *testing.T) { } func TestForEachMultiProcs(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "for_each" - conf.ForEach = append(conf.ForEach, filterConf, encodeConf) + conf, err := processor.FromYAML(` +for_each: + - bloblang: 'root = if !content().contains("foo") { deleted() }' + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -155,13 +149,11 @@ func TestForEachMultiProcs(t *testing.T) { } func TestForEachFilterAll(t *testing.T) { - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "for_each" - conf.ForEach = append(conf.ForEach, filterConf) + conf, err := processor.FromYAML(` +for_each: + - bloblang: 'root = if !content().contains("foo") { deleted() }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_grok.go b/internal/impl/pure/processor_grok.go index 994096db43..a3c22cebf3 100644 --- a/internal/impl/pure/processor_grok.go +++ b/internal/impl/pure/processor_grok.go @@ -11,60 +11,54 @@ import ( "github.com/Jeffail/grok" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/filepath" "github.com/benthosdev/benthos/v4/internal/filepath/ifs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newGrok(conf.Grok, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("grok", p, mgr), nil - }, docs.ComponentSpec{ - Name: "grok", - Categories: []string{ - "Parsing", - }, - Summary: ` -Parses messages into a structured format by attempting to apply a list of Grok expressions, the first expression to result in at least one value replaces the original message with a JSON object containing the values.`, - Description: ` -Type hints within patterns are respected, therefore with the pattern ` + "`%{WORD:first},%{INT:second:int}`" + ` and a payload of ` + "`foo,1`" + ` the resulting payload would be ` + "`{\"first\":\"foo\",\"second\":1}`" + `. +const ( + gpFieldExpressions = "expressions" + gpFieldRemoveEmpty = "remove_empty_values" + gpFieldNamedOnly = "named_captures_only" + gpFieldUseDefaults = "use_default_patterns" + gpFieldPatternPaths = "pattern_paths" + gpFieldPatternDefinitions = "pattern_definitions" +) + +func grokProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Parsing"). + Stable(). + Summary("Parses messages into a structured format by attempting to apply a list of Grok expressions, the first expression to result in at least one value replaces the original message with a JSON object containing the values."). + Description(` +Type hints within patterns are respected, therefore with the pattern `+"`%{WORD:first},%{INT:second:int}`"+` and a payload of `+"`foo,1`"+` the resulting payload would be `+"`{\"first\":\"foo\",\"second\":1}`"+`. ### Performance -This processor currently uses the [Go RE2](https://golang.org/s/re2syntax) regular expression engine, which is guaranteed to run in time linear to the size of the input. However, this property often makes it less performant than PCRE based implementations of grok. For more information see [https://swtch.com/~rsc/regexp/regexp1.html](https://swtch.com/~rsc/regexp/regexp1.html).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("expressions", "One or more Grok expressions to attempt against incoming messages. The first expression to match at least one value will be used to form a result.").Array(), - docs.FieldString("pattern_definitions", "A map of pattern definitions that can be referenced within `patterns`.").Map(), - docs.FieldString("pattern_paths", "A list of paths to load Grok patterns from. This field supports wildcards, including super globs (double star).").Array(), - docs.FieldBool("named_captures_only", "Whether to only capture values from named patterns.").Advanced(), - docs.FieldBool("use_default_patterns", "Whether to use a [default set of patterns](#default-patterns).").Advanced(), - docs.FieldBool("remove_empty_values", "Whether to remove values that are empty from the resulting structure.").Advanced(), - ).ChildDefaultAndTypesFromStruct(processor.NewGrokConfig()), - Examples: []docs.AnnotatedExample{ - { - Title: "VPC Flow Logs", - Summary: ` +This processor currently uses the [Go RE2](https://golang.org/s/re2syntax) regular expression engine, which is guaranteed to run in time linear to the size of the input. However, this property often makes it less performant than PCRE based implementations of grok. For more information see [https://swtch.com/~rsc/regexp/regexp1.html](https://swtch.com/~rsc/regexp/regexp1.html).`). + Footnotes(` +## Default Patterns + +A summary of the default patterns on offer can be [found here](https://github.com/Jeffail/grok/blob/master/patterns.go#L5).`). + Example("VPC Flow Logs", ` Grok can be used to parse unstructured logs such as VPC flow logs that look like this: -` + "```text" + ` +`+"```text"+` 2 123456789010 eni-1235b8ca123456789 172.31.16.139 172.31.16.21 20641 22 6 20 4249 1418530010 1418530070 ACCEPT OK -` + "```" + ` +`+"```"+` Into structured objects that look like this: -` + "```json" + ` +`+"```json"+` {"accountid":"123456789010","action":"ACCEPT","bytes":4249,"dstaddr":"172.31.16.21","dstport":22,"end":1418530070,"interfaceid":"eni-1235b8ca123456789","logstatus":"OK","packets":20,"protocol":6,"srcaddr":"172.31.16.139","srcport":20641,"start":1418530010,"version":2} -` + "```" + ` +`+"```"+` With the following config:`, - Config: ` + ` pipeline: processors: - grok: @@ -73,13 +67,74 @@ pipeline: pattern_definitions: VPCFLOWLOG: '%{NUMBER:version:int} %{NUMBER:accountid} %{NOTSPACE:interfaceid} %{NOTSPACE:srcaddr} %{NOTSPACE:dstaddr} %{NOTSPACE:srcport:int} %{NOTSPACE:dstport:int} %{NOTSPACE:protocol:int} %{NOTSPACE:packets:int} %{NOTSPACE:bytes:int} %{NUMBER:start:int} %{NUMBER:end:int} %{NOTSPACE:action} %{NOTSPACE:logstatus}' `, - }, - }, - Footnotes: ` -## Default Patterns + ). + Fields( + service.NewStringListField(gpFieldExpressions). + Description("One or more Grok expressions to attempt against incoming messages. The first expression to match at least one value will be used to form a result."), + service.NewStringMapField(gpFieldPatternDefinitions). + Description("A map of pattern definitions that can be referenced within `patterns`."). + Default(map[string]any{}), + service.NewStringListField(gpFieldPatternPaths). + Description("A list of paths to load Grok patterns from. This field supports wildcards, including super globs (double star)."). + Default([]any{}), + service.NewBoolField(gpFieldNamedOnly). + Description("Whether to only capture values from named patterns."). + Advanced(). + Default(true), + service.NewBoolField(gpFieldUseDefaults). + Description("Whether to use a [default set of patterns](#default-patterns)."). + Advanced(). + Default(true), + service.NewBoolField(gpFieldRemoveEmpty). + Description("Whether to remove values that are empty from the resulting structure."). + Advanced(). + Default(true), + ) +} + +type grokProcConfig struct { + Expressions []string + RemoveEmpty bool + NamedOnly bool + UseDefaults bool + PatternPaths []string + PatternDefinitions map[string]string +} -A summary of the default patterns on offer can be [found here](https://github.com/Jeffail/grok/blob/master/patterns.go#L5).`, - }) +func init() { + err := service.RegisterBatchProcessor( + "grok", grokProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + var g grokProcConfig + var err error + + if g.Expressions, err = conf.FieldStringList(gpFieldExpressions); err != nil { + return nil, err + } + if g.PatternDefinitions, err = conf.FieldStringMap(gpFieldPatternDefinitions); err != nil { + return nil, err + } + if g.PatternPaths, err = conf.FieldStringList(gpFieldPatternPaths); err != nil { + return nil, err + } + + if g.RemoveEmpty, err = conf.FieldBool(gpFieldRemoveEmpty); err != nil { + return nil, err + } + if g.NamedOnly, err = conf.FieldBool(gpFieldNamedOnly); err != nil { + return nil, err + } + if g.UseDefaults, err = conf.FieldBool(gpFieldUseDefaults); err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newGrok(g, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("grok", p, mgr)), nil + }) if err != nil { panic(err) } @@ -90,7 +145,7 @@ type grokProc struct { log log.Modular } -func newGrok(conf processor.GrokConfig, mgr bundle.NewManagement) (processor.AutoObserved, error) { +func newGrok(conf grokProcConfig, mgr bundle.NewManagement) (processor.AutoObserved, error) { grokConf := grok.Config{ RemoveEmptyValues: conf.RemoveEmpty, NamedCapturesOnly: conf.NamedOnly, diff --git a/internal/impl/pure/processor_grok_test.go b/internal/impl/pure/processor_grok_test.go index 3eb02bd4fc..ace3480964 100644 --- a/internal/impl/pure/processor_grok_test.go +++ b/internal/impl/pure/processor_grok_test.go @@ -2,11 +2,13 @@ package pure_test import ( "context" + "fmt" "os" "path/filepath" "reflect" "testing" + "github.com/Jeffail/gabs/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -16,11 +18,12 @@ import ( ) func TestGrokAllParts(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "grok" - conf.Grok.Expressions = []string{ - "%{WORD:first},%{INT:second:int}", - } + conf, err := processor.FromYAML(` +grok: + expressions: + - "%{WORD:first},%{INT:second:int}" +`) + require.NoError(t, err) gSet, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -57,7 +60,7 @@ func TestGrok(t *testing.T) { pattern string input string output string - definitions map[string]string + definitions map[string]any } tests := []gTest{ @@ -69,7 +72,7 @@ func TestGrok(t *testing.T) { }, { name: "Test pattern definitions", - definitions: map[string]string{ + definitions: map[string]any{ "ACTION": "(pass|deny)", }, input: `pass connection from 127.0.0.1`, @@ -86,13 +89,20 @@ func TestGrok(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "grok" - conf.Grok.Expressions = []string{test.pattern} - conf.Grok.PatternDefinitions = test.definitions + if test.definitions == nil { + test.definitions = map[string]any{} + } + ts := fmt.Sprintf(` +grok: + expressions: + - '%v' + pattern_definitions: %v +`, test.pattern, gabs.Wrap(test.definitions).String()) + conf, err := processor.FromYAML(ts) + require.NoError(t, err, ts) gSet, err := mock.NewManager().NewProcessor(conf) - require.NoError(t, err) + require.NoError(t, err, ts) inMsg := message.QuickBatch([][]byte{[]byte(test.input)}) msgs, _ := gSet.ProcessBatch(context.Background(), inMsg) @@ -104,10 +114,17 @@ func TestGrok(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "grok" - conf.Grok.Expressions = []string{test.pattern} - conf.Grok.PatternDefinitions = test.definitions + if test.definitions == nil { + test.definitions = map[string]any{} + } + ts := fmt.Sprintf(` +grok: + expressions: + - '%v' + pattern_definitions: %v +`, test.pattern, gabs.Wrap(test.definitions).String()) + conf, err := processor.FromYAML(ts) + require.NoError(t, err) gSet, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -130,10 +147,14 @@ FOONESTED %{INT:nested.first:int} %{WORD:nested.second} %{WORD:nested.third} `), 0o777) require.NoError(t, err) - conf := processor.NewConfig() - conf.Type = "grok" - conf.Grok.Expressions = []string{`%{FOONESTED}`, `%{FOOFLAT}`} - conf.Grok.PatternPaths = []string{tmpDir} + conf, err := processor.FromYAML(fmt.Sprintf(` +grok: + expressions: + - "%%{FOONESTED}" + - "%%{FOOFLAT}" + pattern_paths: [ %v ] +`, tmpDir)) + require.NoError(t, err) gSet, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) diff --git a/internal/impl/pure/processor_group_by.go b/internal/impl/pure/processor_group_by.go index a158862e11..6c5d1ccd78 100644 --- a/internal/impl/pure/processor_group_by.go +++ b/internal/impl/pure/processor_group_by.go @@ -2,41 +2,36 @@ package pure import ( "context" - "errors" "fmt" "strconv" "github.com/benthosdev/benthos/v4/internal/bloblang/mapping" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newGroupBy(conf.GroupBy, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("group_by", p, mgr), nil - }, docs.ComponentSpec{ - Name: "group_by", - Categories: []string{ - "Composition", - }, - Summary: ` -Splits a [batch of messages](/docs/configuration/batching) into N batches, where each resulting batch contains a group of messages determined by a [Bloblang query](/docs/guides/bloblang/about).`, - Description: ` +const ( + gbpFieldCheck = "check" + gbpFieldProcessors = "processors" +) + +func groupByProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary(`Splits a [batch of messages](/docs/configuration/batching) into N batches, where each resulting batch contains a group of messages determined by a [Bloblang query](/docs/guides/bloblang/about).`). + Description(` Once the groups are established a list of processors are applied to their respective grouped batch, which can be used to label the batch as per their grouping. Messages that do not pass the check of any specified group are placed in their own group. -The functionality of this processor depends on being applied across messages that are batched. You can find out more about batching [in this doc](/docs/configuration/batching).`, - Examples: []docs.AnnotatedExample{ - { - Title: "Grouped Processing", - Summary: "Imagine we have a batch of messages that we wish to split into a group of foos and everything else, which should be sent to different output destinations based on those groupings. We also need to send the foos as a tar gzip archive. For this purpose we can use the `group_by` processor with a [`switch`](/docs/components/outputs/switch) output:", - Config: ` +The functionality of this processor depends on being applied across messages that are batched. You can find out more about batching [in this doc](/docs/configuration/batching).`). + Example( + "Grouped Processing", + "Imagine we have a batch of messages that we wish to split into a group of foos and everything else, which should be sent to different output destinations based on those groupings. We also need to send the foos as a tar gzip archive. For this purpose we can use the `group_by` processor with a [`switch`](/docs/components/outputs/switch) output:", + ` pipeline: processors: - group_by: @@ -61,22 +56,41 @@ output: project: somewhere_else topic: no_foos_here `, - }, - }, - Config: docs.FieldComponent().Array().WithChildren( - docs.FieldBloblang( - "check", - "A [Bloblang query](/docs/guides/bloblang/about) that should return a boolean value indicating whether a message belongs to a given group.", - `this.type == "foo"`, - `this.contents.urls.contains("https://benthos.dev/")`, - `true`, - ).HasDefault(""), - docs.FieldProcessor( - "processors", - "A list of [processors](/docs/components/processors/about) to execute on the newly formed group.", - ).HasDefault([]any{}).Array(), - ), - }) + ). + Field(service.NewObjectListField("", + service.NewBloblangField(gbpFieldCheck). + Description("A [Bloblang query](/docs/guides/bloblang/about) that should return a boolean value indicating whether a message belongs to a given group."). + Examples( + `this.type == "foo"`, + `this.contents.urls.contains("https://benthos.dev/")`, + `true`, + ), + service.NewProcessorListField(gbpFieldProcessors). + Description("A list of [processors](/docs/components/processors/about) to execute on the newly formed group."). + Default([]any{}), + )) +} + +func init() { + err := service.RegisterBatchProcessor( + "group_by", groupByProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + groupConfs, err := conf.FieldObjectList() + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p := &groupByProc{log: mgr.Logger()} + p.groups = make([]group, len(groupConfs)) + for i, c := range groupConfs { + if p.groups[i], err = groupFromParsed(c, mgr); err != nil { + return nil, fmt.Errorf("group '%v' parse error: %w", i, err) + } + } + + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("group_by", p, mgr)), nil + }) if err != nil { panic(err) } @@ -87,38 +101,30 @@ type group struct { Processors []processor.V1 } -type groupByProc struct { - log log.Modular - groups []group -} - -func newGroupBy(conf processor.GroupByConfig, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { - var err error - groups := make([]group, len(conf)) +func groupFromParsed(conf *service.ParsedConfig, mgr bundle.NewManagement) (g group, err error) { + var checkStr string + if checkStr, err = conf.FieldString(gbpFieldCheck); err != nil { + return + } + if g.Check, err = mgr.BloblEnvironment().NewMapping(checkStr); err != nil { + return + } - for i, gConf := range conf { - if len(gConf.Check) > 0 { - if groups[i].Check, err = mgr.BloblEnvironment().NewMapping(gConf.Check); err != nil { - return nil, fmt.Errorf("failed to parse check for group '%v': %v", i, err) - } - } else { - return nil, errors.New("a group definition must have a check query") - } + var iProcs []*service.OwnedProcessor + if iProcs, err = conf.FieldProcessorList(gbpFieldProcessors); err != nil { + return + } - for j, pConf := range gConf.Processors { - pMgr := mgr.IntoPath("group_by", strconv.Itoa(i), "processors", strconv.Itoa(j)) - proc, err := pMgr.NewProcessor(pConf) - if err != nil { - return nil, err - } - groups[i].Processors = append(groups[i].Processors, proc) - } + g.Processors = make([]processor.V1, len(iProcs)) + for i, c := range iProcs { + g.Processors[i] = interop.UnwrapOwnedProcessor(c) } + return +} - return &groupByProc{ - log: mgr.Logger(), - groups: groups, - }, nil +type groupByProc struct { + log log.Modular + groups []group } func (g *groupByProc) ProcessBatch(ctx *processor.BatchProcContext, msg message.Batch) ([]message.Batch, error) { diff --git a/internal/impl/pure/processor_group_by_test.go b/internal/impl/pure/processor_group_by_test.go index e473b85675..50f5b2b391 100644 --- a/internal/impl/pure/processor_group_by_test.go +++ b/internal/impl/pure/processor_group_by_test.go @@ -6,7 +6,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - yaml "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -16,36 +15,18 @@ import ( ) func TestGroupBy(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "group_by" - - procConf := processor.NewConfig() - require.NoError(t, yaml.Unmarshal([]byte(` -archive: - format: lines`), &procConf)) - - conf.GroupBy = append(conf.GroupBy, processor.GroupByElement{ - Check: `content().contains("foo")`, - Processors: []processor.Config{ - procConf, - }, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = content().uppercase()` - - procConf2 := processor.NewConfig() - procConf2.Type = "bloblang" - procConf2.Bloblang = `root = content().trim()` - - conf.GroupBy = append(conf.GroupBy, processor.GroupByElement{ - Check: `content().contains("bar")`, - Processors: []processor.Config{ - procConf, - procConf2, - }, - }) + conf, err := processor.FromYAML(` +group_by: + - check: 'content().contains("foo")' + processors: + - archive: + format: lines + - check: 'content().contains("bar")' + processors: + - bloblang: 'root = content().uppercase()' + - bloblang: 'root = content().trim()' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -88,21 +69,15 @@ archive: } func TestGroupByErrs(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "group_by" - - procConf := processor.NewConfig() - require.NoError(t, yaml.Unmarshal([]byte(` -archive: - format: lines`), &procConf)) - - conf.GroupBy = append(conf.GroupBy, processor.GroupByElement{ - Processors: []processor.Config{ - procConf, - }, - }) + conf, err := processor.FromYAML(` +group_by: + - processors: + - archive: + format: lines +`) + require.NoError(t, err) - _, err := mock.NewManager().NewProcessor(conf) + _, err = mock.NewManager().NewProcessor(conf) require.Error(t, err) - require.Contains(t, err.Error(), "a group definition must have a check query") + require.Contains(t, err.Error(), "field 'check' is required") } diff --git a/internal/impl/pure/processor_group_by_value.go b/internal/impl/pure/processor_group_by_value.go index 5b8d669cc9..c21f132b55 100644 --- a/internal/impl/pure/processor_group_by_value.go +++ b/internal/impl/pure/processor_group_by_value.go @@ -6,37 +6,33 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/field" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + gbvpFieldValue = "value" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newGroupByValue(conf.GroupByValue, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("group_by_value", p, mgr), nil - }, docs.ComponentSpec{ - Name: "group_by_value", - Categories: []string{ - "Composition", - }, - Summary: `Splits a batch of messages into N batches, where each resulting batch contains a group of messages determined by a [function interpolated string](/docs/configuration/interpolation#bloblang-queries) evaluated per message.`, - Description: ` + err := service.RegisterBatchProcessor( + "group_by_value", service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary(`Splits a batch of messages into N batches, where each resulting batch contains a group of messages determined by a [function interpolated string](/docs/configuration/interpolation#bloblang-queries) evaluated per message.`). + Description(` This allows you to group messages using arbitrary fields within their content or metadata, process them individually, and send them to unique locations as per their group. -The functionality of this processor depends on being applied across messages that are batched. You can find out more about batching [in this doc](/docs/configuration/batching).`, - Footnotes: ` +The functionality of this processor depends on being applied across messages that are batched. You can find out more about batching [in this doc](/docs/configuration/batching).`). + Footnotes(` ## Examples -If we were consuming Kafka messages and needed to group them by their key, -archive the groups, and send them to S3 with the key as part of the path we -could achieve that with the following: +If we were consuming Kafka messages and needed to group them by their key, archive the groups, and send them to S3 with the key as part of the path we could achieve that with the following: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - group_by_value: @@ -49,14 +45,23 @@ output: aws_s3: bucket: TODO path: docs/${! meta("kafka_key") }/${! count("files") }-${! timestamp_unix_nano() }.tar.gz -` + "```" + ``, - Config: docs.FieldComponent().WithChildren( - docs.FieldString( - "value", "The interpolated string to group based on.", - "${! meta(\"kafka_key\") }", "${! json(\"foo.bar\") }-${! meta(\"baz\") }", - ).IsInterpolated().HasDefault(""), - ), - }) +`+"```"+``). + Field(service.NewInterpolatedStringField(gbvpFieldValue). + Description("The interpolated string to group based on."). + Examples("${! meta(\"kafka_key\") }", "${! json(\"foo.bar\") }-${! meta(\"baz\") }")), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + valueStr, err := conf.FieldString(gbvpFieldValue) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newGroupByValue(valueStr, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("group_by_value", p, mgr)), nil + }) if err != nil { panic(err) } @@ -67,8 +72,8 @@ type groupByValueProc struct { value *field.Expression } -func newGroupByValue(conf processor.GroupByValueConfig, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { - value, err := mgr.BloblEnvironment().NewField(conf.Value) +func newGroupByValue(valueStr string, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { + value, err := mgr.BloblEnvironment().NewField(valueStr) if err != nil { return nil, fmt.Errorf("failed to parse value expression: %v", err) } diff --git a/internal/impl/pure/processor_group_by_value_test.go b/internal/impl/pure/processor_group_by_value_test.go index 818d07aac8..b15f09fef6 100644 --- a/internal/impl/pure/processor_group_by_value_test.go +++ b/internal/impl/pure/processor_group_by_value_test.go @@ -5,6 +5,8 @@ import ( "reflect" "testing" + "github.com/stretchr/testify/require" + "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" "github.com/benthosdev/benthos/v4/internal/message" @@ -13,9 +15,11 @@ import ( ) func TestGroupByValueBasic(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "group_by_value" - conf.GroupByValue.Value = "${!json(\"foo\")}" + conf, err := processor.FromYAML(` +group_by_value: + value: ${!json("foo")} +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_insert_part.go b/internal/impl/pure/processor_insert_part.go index bda24530ef..650dec019b 100644 --- a/internal/impl/pure/processor_insert_part.go +++ b/internal/impl/pure/processor_insert_part.go @@ -6,44 +6,60 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/field" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) +const ( + ippFieldIndex = "index" + ippFieldContent = "content" +) + +func insertPartSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary("Insert a new message into a batch at an index. If the specified index is greater than the length of the existing batch it will be appended to the end."). + Description(` +The index can be negative, and if so the message will be inserted from the end counting backwards starting from -1. E.g. if index = -1 then the new message will become the last of the batch, if index = -2 then the new message will be inserted before the last message, and so on. If the negative index is greater than the length of the existing batch it will be inserted at the beginning. + +The new message will have metadata copied from the first pre-existing message of the batch. + +This processor will interpolate functions within the 'content' field, you can find a list of functions [here](/docs/configuration/interpolation#bloblang-queries).`). + Fields( + service.NewIntField(ippFieldIndex). + Description("The index within the batch to insert the message at."). + Default(-1), + service.NewInterpolatedStringField(ippFieldContent). + Description("The content of the message being inserted."). + Default(""), + ) +} + func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newInsertPart(conf.InsertPart, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("insert_part", p, mgr), nil - }, docs.ComponentSpec{ - Name: "insert_part", - Categories: []string{ - "Composition", - }, - Summary: ` -Insert a new message into a batch at an index. If the specified index is greater -than the length of the existing batch it will be appended to the end.`, - Description: ` -The index can be negative, and if so the message will be inserted from the end -counting backwards starting from -1. E.g. if index = -1 then the new message -will become the last of the batch, if index = -2 then the new message will be -inserted before the last message, and so on. If the negative index is greater -than the length of the existing batch it will be inserted at the beginning. - -The new message will have metadata copied from the first pre-existing message of -the batch. - -This processor will interpolate functions within the 'content' field, you can -find a list of functions [here](/docs/configuration/interpolation#bloblang-queries).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldInt("index", "The index within the batch to insert the message at."), - docs.FieldString("content", "The content of the message being inserted.").IsInterpolated(), - ).ChildDefaultAndTypesFromStruct(processor.NewInsertPartConfig()), - }) + err := service.RegisterBatchProcessor( + "insert_part", insertPartSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + index, err := conf.FieldInt(ippFieldIndex) + if err != nil { + return nil, err + } + + contentStr, err := conf.FieldString(ippFieldContent) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newInsertPart(index, contentStr, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("insert_part", p, mgr)), nil + }) if err != nil { panic(err) } @@ -55,14 +71,14 @@ type insertPart struct { log log.Modular } -func newInsertPart(conf processor.InsertPartConfig, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { - part, err := mgr.BloblEnvironment().NewField(conf.Content) +func newInsertPart(index int, contentStr string, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { + part, err := mgr.BloblEnvironment().NewField(contentStr) if err != nil { return nil, fmt.Errorf("failed to parse content expression: %v", err) } return &insertPart{ part: part, - index: conf.Index, + index: index, log: mgr.Logger(), }, nil } diff --git a/internal/impl/pure/processor_insert_part_test.go b/internal/impl/pure/processor_insert_part_test.go index 6f583d00d4..7154fdecbf 100644 --- a/internal/impl/pure/processor_insert_part_test.go +++ b/internal/impl/pure/processor_insert_part_test.go @@ -5,21 +5,26 @@ import ( "fmt" "os" "reflect" + "strconv" "testing" + "github.com/stretchr/testify/require" + "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" "github.com/benthosdev/benthos/v4/internal/message" ) func TestInsertBoundaries(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "insert_part" - conf.InsertPart.Content = "hello world" - for i := 0; i < 10; i++ { for j := -5; j <= 5; j++ { - conf.InsertPart.Index = j + conf, err := processor.FromYAML(fmt.Sprintf(` +insert_part: + content: hello world + index: %v +`, j)) + require.NoError(t, err) + proc, err := mock.NewManager().NewProcessor(conf) if err != nil { t.Error(err) @@ -45,10 +50,6 @@ func TestInsertBoundaries(t *testing.T) { } func TestInsertPart(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "insert_part" - conf.InsertPart.Content = "hello world" - type test struct { index int in [][]byte @@ -162,7 +163,13 @@ func TestInsertPart(t *testing.T) { } for _, test := range tests { - conf.InsertPart.Index = test.index + conf, err := processor.FromYAML(` +insert_part: + content: hello world + index: ` + strconv.Itoa(test.index) + ` +`) + require.NoError(t, err) + proc, err := mock.NewManager().NewProcessor(conf) if err != nil { t.Error(err) @@ -182,9 +189,11 @@ func TestInsertPart(t *testing.T) { } func TestInsertPartInterpolation(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "insert_part" - conf.InsertPart.Content = "hello ${!hostname()} world" + conf, err := processor.FromYAML(` +insert_part: + content: 'hello ${!hostname()} world' +`) + require.NoError(t, err) hostname, _ := os.Hostname() diff --git a/internal/impl/pure/processor_jmespath.go b/internal/impl/pure/processor_jmespath.go index a5152c40f1..bffdcfc705 100644 --- a/internal/impl/pure/processor_jmespath.go +++ b/internal/impl/pure/processor_jmespath.go @@ -8,39 +8,31 @@ import ( jmespath "github.com/jmespath/go-jmespath" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newJMESPath(conf.JMESPath, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("jmespath", p, mgr), nil - }, docs.ComponentSpec{ - Name: "jmespath", - Categories: []string{ - "Mapping", - }, - Summary: ` -Executes a [JMESPath query](http://jmespath.org/) on JSON documents and replaces -the message with the resulting document.`, - Description: ` +const ( + jmpFieldQuery = "query" +) + +func jmpProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Mapping"). + Stable(). + Summary("Executes a [JMESPath query](http://jmespath.org/) on JSON documents and replaces the message with the resulting document."). + Description(` :::note Try out Bloblang -For better performance and improved capabilities try out native Benthos mapping with the [` + "`mapping`" + ` processor](/docs/components/processors/mapping). +For better performance and improved capabilities try out native Benthos mapping with the [`+"`mapping`"+` processor](/docs/components/processors/mapping). ::: -`, - Examples: []docs.AnnotatedExample{ - { - Title: "Mapping", - Summary: ` +`). + Example("Mapping", ` When receiving JSON documents of the form: -` + "```json" + ` +`+"```json"+` { "locations": [ {"name": "Seattle", "state": "WA"}, @@ -49,27 +41,43 @@ When receiving JSON documents of the form: {"name": "Olympia", "state": "WA"} ] } -` + "```" + ` +`+"```"+` -We could collapse the location names from the state of Washington into a field ` + "`Cities`" + `: +We could collapse the location names from the state of Washington into a field `+"`Cities`"+`: -` + "```json" + ` +`+"```json"+` {"Cities": "Bellevue, Olympia, Seattle"} -` + "```" + ` +`+"```"+` With the following config:`, - Config: ` + ` pipeline: processors: - jmespath: query: "locations[?state == 'WA'].name | sort(@) | {Cities: join(', ', @)}" `, - }, - }, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("query", "The JMESPath query to apply to messages.").HasDefault(""), - ), - }) + ). + Field(service.NewStringField(jmpFieldQuery). + Description("The JMESPath query to apply to messages.")) +} + +func init() { + err := service.RegisterBatchProcessor( + "jmespath", jmpProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + queryStr, err := conf.FieldString(jmpFieldQuery) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newJMESPath(queryStr, mgr) + if err != nil { + return nil, err + } + + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("jmespath", p, mgr)), nil + }) if err != nil { panic(err) } @@ -80,8 +88,8 @@ type jmespathProc struct { log log.Modular } -func newJMESPath(conf processor.JMESPathConfig, mgr bundle.NewManagement) (processor.AutoObserved, error) { - query, err := jmespath.Compile(conf.Query) +func newJMESPath(queryStr string, mgr bundle.NewManagement) (processor.AutoObserved, error) { + query, err := jmespath.Compile(queryStr) if err != nil { return nil, fmt.Errorf("failed to compile JMESPath query: %v", err) } diff --git a/internal/impl/pure/processor_jmespath_test.go b/internal/impl/pure/processor_jmespath_test.go index f647534218..260442901b 100644 --- a/internal/impl/pure/processor_jmespath_test.go +++ b/internal/impl/pure/processor_jmespath_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/Jeffail/gabs/v2" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -13,9 +14,11 @@ import ( ) func TestJMESPathAllParts(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jmespath" - conf.JMESPath.Query = "foo.bar" + conf, err := processor.FromYAML(` +jmespath: + query: foo.bar +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -42,9 +45,11 @@ func TestJMESPathAllParts(t *testing.T) { } func TestJMESPathValidation(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jmespath" - conf.JMESPath.Query = "foo.bar" + conf, err := processor.FromYAML(` +jmespath: + query: foo.bar +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -65,9 +70,11 @@ func TestJMESPathValidation(t *testing.T) { } func TestJMESPathMutation(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jmespath" - conf.JMESPath.Query = "{foo: merge(foo, {bar:'baz'})}" + conf, err := processor.FromYAML(` +jmespath: + query: "{foo: merge(foo, {bar:'baz'})}" +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -156,9 +163,11 @@ func TestJMESPath(t *testing.T) { } for _, test := range tests { - conf := processor.NewConfig() - conf.Type = "jmespath" - conf.JMESPath.Query = test.path + conf, err := processor.FromYAML(` +jmespath: + query: "` + test.path + `" +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_jq.go b/internal/impl/pure/processor_jq.go index f4fe5466a0..2fc0c4da42 100644 --- a/internal/impl/pure/processor_jq.go +++ b/internal/impl/pure/processor_jq.go @@ -9,66 +9,50 @@ import ( "github.com/itchyny/gojq" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newJQ(conf.JQ, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("jq", p, mgr), nil - }, docs.ComponentSpec{ - Name: "jq", - Status: docs.StatusStable, - Categories: []string{ - "Mapping", - }, - Summary: ` -Transforms and filters messages using jq queries.`, - Description: ` +const ( + jqpFieldQuery = "query" + jqpFieldRaw = "raw" + jqpFieldOutputRaw = "output_raw" +) + +func jqProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Mapping"). + Stable(). + Summary("Transforms and filters messages using jq queries."). + Description(` :::note Try out Bloblang -For better performance and improved capabilities try out native Benthos mapping with the [` + "`mapping`" + ` processor](/docs/components/processors/mapping). +For better performance and improved capabilities try out native Benthos mapping with the [`+"`mapping`"+` processor](/docs/components/processors/mapping). ::: -The provided query is executed on each message, targeting either the contents -as a structured JSON value or as a raw string using the field ` + "`raw`" + `, -and the message is replaced with the query result. +The provided query is executed on each message, targeting either the contents as a structured JSON value or as a raw string using the field `+"`raw`"+`, and the message is replaced with the query result. -Message metadata is also accessible within the query from the variable -` + "`$metadata`" + `. +Message metadata is also accessible within the query from the variable `+"`$metadata`"+`. -This processor uses the [gojq library][gojq], and therefore does not require -jq to be installed as a dependency. However, this also means there are some -differences in how these queries are executed versus the jq cli which you can -[read about here][gojq-difference]. +This processor uses the [gojq library][gojq], and therefore does not require jq to be installed as a dependency. However, this also means there are some differences in how these queries are executed versus the jq cli which you can [read about here][gojq-difference]. -If the query does not emit any value then the message is filtered, if the query -returns multiple values then the resulting message will be an array containing -all values. +If the query does not emit any value then the message is filtered, if the query returns multiple values then the resulting message will be an array containing all values. The full query syntax is described in [jq's documentation][jq-docs]. ## Error Handling -Queries can fail, in which case the message remains unchanged, errors are -logged, and the message is flagged as having failed, allowing you to use -[standard processor error handling patterns](/docs/configuration/error_handling).`, - Footnotes: ` +Queries can fail, in which case the message remains unchanged, errors are logged, and the message is flagged as having failed, allowing you to use [standard processor error handling patterns](/docs/configuration/error_handling).`). + Footnotes(` [gojq]: https://github.com/itchyny/gojq [gojq-difference]: https://github.com/itchyny/gojq#difference-to-jq -[jq-docs]: https://stedolan.github.io/jq/manual/`, - Examples: []docs.AnnotatedExample{ - { - Title: "Mapping", - Summary: ` +[jq-docs]: https://stedolan.github.io/jq/manual/`). + Example("Mapping", ` When receiving JSON documents of the form: -` + "```json" + ` +`+"```json"+` { "locations": [ {"name": "Seattle", "state": "WA"}, @@ -77,29 +61,61 @@ When receiving JSON documents of the form: {"name": "Olympia", "state": "WA"} ] } -` + "```" + ` +`+"```"+` -We could collapse the location names from the state of Washington into a field ` + "`Cities`" + `: +We could collapse the location names from the state of Washington into a field `+"`Cities`"+`: -` + "```json" + ` +`+"```json"+` {"Cities": "Bellevue, Olympia, Seattle"} -` + "```" + ` +`+"```"+` With the following config:`, - Config: ` + ` pipeline: processors: - jq: query: '{Cities: .locations | map(select(.state == "WA").name) | sort | join(", ") }' `, - }, - }, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("query", "The jq query to filter and transform messages with."), - docs.FieldBool("raw", "Whether to process the input as a raw string instead of as JSON.").Advanced(), - docs.FieldBool("output_raw", "Whether to output raw text (unquoted) instead of JSON strings when the emitted values are string types.").Advanced(), - ).ChildDefaultAndTypesFromStruct(processor.NewJQConfig()), - }) + ). + Fields( + service.NewStringField(jqpFieldQuery). + Description("The jq query to filter and transform messages with."), + service.NewBoolField(jqpFieldRaw). + Description("Whether to process the input as a raw string instead of as JSON."). + Advanced(). + Default(false), + service.NewBoolField(jqpFieldOutputRaw). + Description("Whether to output raw text (unquoted) instead of JSON strings when the emitted values are string types."). + Advanced(). + Default(false), + ) +} + +func init() { + err := service.RegisterBatchProcessor( + "jq", jqProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + query, err := conf.FieldString(jqpFieldQuery) + if err != nil { + return nil, err + } + raw, err := conf.FieldBool(jqpFieldRaw) + if err != nil { + return nil, err + } + outputRaw, err := conf.FieldBool(jqpFieldOutputRaw) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + + p, err := newJQ(query, raw, outputRaw, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("jq", p, mgr)), nil + }) if err != nil { panic(err) } @@ -116,14 +132,14 @@ type jqProc struct { code *gojq.Code } -func newJQ(conf processor.JQConfig, mgr bundle.NewManagement) (*jqProc, error) { +func newJQ(queryStr string, raw, outputRaw bool, mgr bundle.NewManagement) (*jqProc, error) { j := &jqProc{ - inRaw: conf.Raw, - outRaw: conf.OutputRaw, + inRaw: raw, + outRaw: outputRaw, log: mgr.Logger(), } - query, err := gojq.Parse(conf.Query) + query, err := gojq.Parse(queryStr) if err != nil { return nil, fmt.Errorf("error parsing jq query: %w", err) } diff --git a/internal/impl/pure/processor_jq_test.go b/internal/impl/pure/processor_jq_test.go index db1167d608..28547b9be8 100644 --- a/internal/impl/pure/processor_jq_test.go +++ b/internal/impl/pure/processor_jq_test.go @@ -16,9 +16,11 @@ import ( ) func TestJQAllParts(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jq" - conf.JQ.Query = ".foo.bar" + conf, err := processor.FromYAML(` +jq: + query: .foo.bar +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -37,9 +39,11 @@ func TestJQAllParts(t *testing.T) { } func TestJQValidation(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jq" - conf.JQ.Query = ".foo.bar" + conf, err := processor.FromYAML(` +jq: + query: .foo.bar +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -54,9 +58,11 @@ func TestJQValidation(t *testing.T) { } func TestJQMutation(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jq" - conf.JQ.Query = `{foo: .foo} | .foo.bar = "baz"` + conf, err := processor.FromYAML(` +jq: + query: '{foo: .foo} | .foo.bar = "baz"' +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -159,9 +165,11 @@ func TestJQ(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jq" - conf.JQ.Query = test.path + conf, err := processor.FromYAML(` +jq: + query: '` + test.path + `' +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -260,10 +268,12 @@ func TestJQ_OutputRaw(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "jq" - conf.JQ.Query = test.path - conf.JQ.OutputRaw = true + conf, err := processor.FromYAML(` +jq: + query: '` + test.path + `' + output_raw: true +`) + require.NoError(t, err) jSet, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) diff --git a/internal/impl/pure/processor_jsonschema.go b/internal/impl/pure/processor_jsonschema.go index bd5364d481..a811bf9ea5 100644 --- a/internal/impl/pure/processor_jsonschema.go +++ b/internal/impl/pure/processor_jsonschema.go @@ -4,42 +4,36 @@ import ( "context" "errors" "fmt" + "net/http" "strings" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" jsonschema "github.com/xeipuuv/gojsonschema" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newJSONSchema(conf.JSONSchema, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("json_schema", p, mgr), nil - }, docs.ComponentSpec{ - Name: "json_schema", - Categories: []string{ - "Mapping", - }, - Summary: ` -Checks messages against a provided JSONSchema definition but does not change the -payload under any circumstances. If a message does not match the schema it can -be caught using error handling methods outlined [here](/docs/configuration/error_handling).`, - Description: ` -Please refer to the [JSON Schema website](https://json-schema.org/) for -information and tutorials regarding the syntax of the schema.`, - Footnotes: ` +const ( + jschemaPFieldSchemaPath = "schema_path" + jschemaPFieldSchema = "schema" +) + +func jschemaProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Mapping"). + Stable(). + Summary(`Checks messages against a provided JSONSchema definition but does not change the payload under any circumstances. If a message does not match the schema it can be caught using error handling methods outlined [here](/docs/configuration/error_handling).`). + Description(`Please refer to the [JSON Schema website](https://json-schema.org/) for information and tutorials regarding the syntax of the schema.`). + Footnotes(` ## Examples With the following JSONSchema document: -` + "```json" + ` +`+"```json"+` { "$id": "https://example.com/person.schema.json", "$schema": "http://json-schema.org/draft-07/schema#", @@ -61,11 +55,11 @@ With the following JSONSchema document: } } } -` + "```" + ` +`+"```"+` And the following Benthos configuration: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - json_schema: @@ -75,21 +69,39 @@ pipeline: level: ERROR message: "Schema validation failed due to: ${!error()}" - mapping: 'root = deleted()' # Drop messages that fail -` + "```" + ` +`+"```"+` If a payload being processed looked like: -` + "```json" + ` +`+"```json"+` {"firstName":"John","lastName":"Doe","age":-21} -` + "```" + ` +`+"```"+` Then a log message would appear explaining the fault and the payload would be -dropped.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("schema", "A schema to apply. Use either this or the `schema_path` field."), - docs.FieldString("schema_path", "The path of a schema document to apply. Use either this or the `schema` field."), - ).ChildDefaultAndTypesFromStruct(processor.NewJSONSchemaConfig()), - }) +dropped.`). + Fields( + service.NewStringField(jschemaPFieldSchema). + Description("A schema to apply. Use either this or the `schema_path` field."). + Optional(), + service.NewStringField(jschemaPFieldSchemaPath). + Description("The path of a schema document to apply. Use either this or the `schema` field."). + Optional(), + ) +} + +func init() { + err := service.RegisterBatchProcessor( + "json_schema", jschemaProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + schemaStr, _ := conf.FieldString(jschemaPFieldSchema) + schemaPath, _ := conf.FieldString(jschemaPFieldSchemaPath) + mgr := interop.UnwrapManagement(res) + p, err := newJSONSchema(schemaStr, schemaPath, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("json_schema", p, mgr)), nil + }) if err != nil { panic(err) } @@ -100,22 +112,22 @@ type jsonSchemaProc struct { schema *jsonschema.Schema } -func newJSONSchema(conf processor.JSONSchemaConfig, mgr bundle.NewManagement) (processor.AutoObserved, error) { +func newJSONSchema(schemaStr, schemaPath string, mgr bundle.NewManagement) (processor.AutoObserved, error) { var schema *jsonschema.Schema var err error // load JSONSchema definition - if schemaPath := conf.SchemaPath; schemaPath != "" { + if schemaPath := schemaPath; schemaPath != "" { if !(strings.HasPrefix(schemaPath, "file://") || strings.HasPrefix(schemaPath, "http://")) { return nil, fmt.Errorf("invalid schema_path provided, must start with file:// or http://") } - schema, err = jsonschema.NewSchema(jsonschema.NewReferenceLoader(conf.SchemaPath)) + schema, err = jsonschema.NewSchema(jsonschema.NewReferenceLoaderFileSystem(schemaPath, http.FS(mgr.FS()))) if err != nil { return nil, fmt.Errorf("failed to load JSON schema definition: %v", err) } - } else if conf.Schema != "" { - schema, err = jsonschema.NewSchema(jsonschema.NewStringLoader(conf.Schema)) + } else if schemaStr != "" { + schema, err = jsonschema.NewSchema(jsonschema.NewStringLoader(schemaStr)) if err != nil { return nil, fmt.Errorf("failed to load JSON schema definition: %v", err) } diff --git a/internal/impl/pure/processor_jsonschema_test.go b/internal/impl/pure/processor_jsonschema_test.go index b25cb7d120..2e3fd15342 100644 --- a/internal/impl/pure/processor_jsonschema_test.go +++ b/internal/impl/pure/processor_jsonschema_test.go @@ -4,8 +4,12 @@ import ( "context" "fmt" "os" + "path/filepath" + "strings" "testing" + "github.com/stretchr/testify/require" + "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" "github.com/benthosdev/benthos/v4/internal/message" @@ -13,37 +17,31 @@ import ( func TestJSONSchemaExternalSchemaCheck(t *testing.T) { schema := `{ - "$id": "https://example.com/person.schema.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Person", - "type": "object", - "properties": { - "firstName": { - "type": "string", - "description": "The person's first name." - }, - "lastName": { - "type": "string", - "description": "The person's last name." - }, - "age": { - "description": "Age in years which must be equal to or greater than zero.", - "type": "integer", - "minimum": 0 - } - } - }` + "$id": "https://example.com/person.schema.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Person", + "type": "object", + "properties": { + "firstName": { + "type": "string", + "description": "The person's first name." + }, + "lastName": { + "type": "string", + "description": "The person's last name." + }, + "age": { + "description": "Age in years which must be equal to or greater than zero.", + "type": "integer", + "minimum": 0 + } + } +}` - tmpSchemaFile, err := os.CreateTemp("", "benthos_jsonschema_test") - if err != nil { - t.Fatal(err) - } - defer os.Remove(tmpSchemaFile.Name()) + tmpDir := t.TempDir() - // write schema definition to tmpfile - if _, err := tmpSchemaFile.WriteString(schema); err != nil { - t.Fatal(err) - } + sFileName := filepath.Join(tmpDir, "foo") + require.NoError(t, os.WriteFile(sFileName, []byte(schema), 0o777)) type fields struct { schemaPath string @@ -58,7 +56,7 @@ func TestJSONSchemaExternalSchemaCheck(t *testing.T) { { name: "schema match", fields: fields{ - schemaPath: fmt.Sprintf("file://%s", tmpSchemaFile.Name()), + schemaPath: fmt.Sprintf("file:///%s", sFileName), }, arg: [][]byte{ []byte(`{"firstName":"John","lastName":"Doe","age":21}`), @@ -68,7 +66,7 @@ func TestJSONSchemaExternalSchemaCheck(t *testing.T) { { name: "schema no match", fields: fields{ - schemaPath: fmt.Sprintf("file://%s", tmpSchemaFile.Name()), + schemaPath: fmt.Sprintf("file:///%s", sFileName), }, arg: [][]byte{ []byte(`{"firstName":"John","lastName":"Doe","age":-20}`), @@ -79,9 +77,11 @@ func TestJSONSchemaExternalSchemaCheck(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "json_schema" - conf.JSONSchema.SchemaPath = tt.fields.schemaPath + conf, err := processor.FromYAML(fmt.Sprintf(` +json_schema: + schema_path: '%v' +`, tt.fields.schemaPath)) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -110,26 +110,26 @@ func TestJSONSchemaExternalSchemaCheck(t *testing.T) { func TestJSONSchemaInlineSchemaCheck(t *testing.T) { schemaDef := `{ - "$id": "https://example.com/person.schema.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Person", - "type": "object", - "properties": { - "firstName": { - "type": "string", - "description": "The person's first name." - }, - "lastName": { - "type": "string", - "description": "The person's last name." - }, - "age": { - "description": "Age in years which must be equal to or greater than zero.", - "type": "integer", - "minimum": 0 - } - } - }` + "$id": "https://example.com/person.schema.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Person", + "type": "object", + "properties": { + "firstName": { + "type": "string", + "description": "The person's first name." + }, + "lastName": { + "type": "string", + "description": "The person's last name." + }, + "age": { + "description": "Age in years which must be equal to or greater than zero.", + "type": "integer", + "minimum": 0 + } + } +}` type fields struct { schema string @@ -168,9 +168,12 @@ func TestJSONSchemaInlineSchemaCheck(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "json_schema" - conf.JSONSchema.Schema = tt.fields.schema + conf, err := processor.FromYAML(fmt.Sprintf(` +json_schema: + schema: | + %v +`, strings.ReplaceAll(tt.fields.schema, "\n", " "))) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -199,39 +202,39 @@ func TestJSONSchemaInlineSchemaCheck(t *testing.T) { func TestJSONSchemaLowercaseDescriptionCheck(t *testing.T) { schema := `{ - "$id": "https://example.com/person.schema.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Person", - "type": "object", - "properties": { - "firstName": { - "type": "string", - "description": "The person's first name." - }, - "addresses": { - "description": "The person's addresses.'", - "type": "array", - "items": { - "type": "object", - "properties": { - "cityName": { - "description": "The city's name'", - "type": "string", - "maxLength": 50 - }, - "postCode": { - "description": "The city's postal code'", - "type": "string", - "maxLength": 50 - } - }, - "required": [ - "cityName" - ] - } - } + "$id": "https://example.com/person.schema.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Person", + "type": "object", + "properties": { + "firstName": { + "type": "string", + "description": "The person's first name." + }, + "addresses": { + "description": "The person's addresses.'", + "type": "array", + "items": { + "type": "object", + "properties": { + "cityName": { + "description": "The city's name'", + "type": "string", + "maxLength": 50 + }, + "postCode": { + "description": "The city's postal code'", + "type": "string", + "maxLength": 50 + } + }, + "required": [ + "cityName" + ] } - }` + } + } +}` type fields struct { schema string @@ -270,9 +273,12 @@ func TestJSONSchemaLowercaseDescriptionCheck(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "json_schema" - conf.JSONSchema.Schema = tt.fields.schema + conf, err := processor.FromYAML(fmt.Sprintf(` +json_schema: + schema: | + %v +`, strings.ReplaceAll(tt.fields.schema, "\n", " "))) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -300,11 +306,13 @@ func TestJSONSchemaLowercaseDescriptionCheck(t *testing.T) { } func TestJSONSchemaPathNotExist(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "json_schema" - conf.JSONSchema.SchemaPath = "file://path_does_not_exist" + conf, err := processor.FromYAML(` +json_schema: + schema_path: file://path_does_not_exist +`) + require.NoError(t, err) - _, err := mock.NewManager().NewProcessor(conf) + _, err = mock.NewManager().NewProcessor(conf) if err == nil { t.Error("expected error from loading non existent schema file") } @@ -327,9 +335,11 @@ func TestJSONSchemaInvalidSchema(t *testing.T) { t.Fatal(err) } - conf := processor.NewConfig() - conf.Type = "json_schema" - conf.JSONSchema.SchemaPath = fmt.Sprintf("file://%s", tmpSchemaFile.Name()) + conf, err := processor.FromYAML(` +json_schema: + schema_path: ` + fmt.Sprintf("file://%s", tmpSchemaFile.Name()) + ` +`) + require.NoError(t, err) _, err = mock.NewManager().NewProcessor(conf) if err == nil { diff --git a/internal/impl/pure/processor_log.go b/internal/impl/pure/processor_log.go index 4e6dc76fdd..badc64a379 100644 --- a/internal/impl/pure/processor_log.go +++ b/internal/impl/pure/processor_log.go @@ -9,33 +9,33 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/field" "github.com/benthosdev/benthos/v4/internal/bloblang/mapping" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newLogProcessor(conf, mgr, mgr.Logger()) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("log", p, mgr), nil - }, docs.ComponentSpec{ - Name: "log", - Categories: []string{ - "Utility", - }, - Summary: `Prints a log event for each message. Messages always remain unchanged. The log message can be set using function interpolations described [here](/docs/configuration/interpolation#bloblang-queries) which allows you to log the contents and metadata of messages.`, - Description: ` -The ` + "`level`" + ` field determines the log level of the printed events and can be any of the following values: TRACE, DEBUG, INFO, WARN, ERROR. +const ( + logPFieldLevel = "level" + logPFieldFields = "fields" + logPFieldFieldsMapping = "fields_mapping" + logPFieldMessage = "message" +) + +func logProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary(`Prints a log event for each message. Messages always remain unchanged. The log message can be set using function interpolations described [here](/docs/configuration/interpolation#bloblang-queries) which allows you to log the contents and metadata of messages.`). + Description(` +The `+"`level`"+` field determines the log level of the printed events and can be any of the following values: TRACE, DEBUG, INFO, WARN, ERROR. ### Structured Fields -It's also possible add custom fields to logs when the format is set to a structured form such as ` + "`json` or `logfmt`" + ` with the config field ` + "[`fields_mapping`](#fields_mapping)" + `: +It's also possible add custom fields to logs when the format is set to a structured form such as `+"`json` or `logfmt`"+` with the config field `+"[`fields_mapping`](#fields_mapping)"+`: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - log: @@ -46,21 +46,60 @@ pipeline: root.id = this.id root.age = this.user.age root.kafka_topic = meta("kafka_topic") -` + "```" + ` -`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("level", "The log level to use.").HasOptions("FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE", "ALL").LinterFunc(nil), - docs.FieldString("fields", "A map of fields to print along with the log message.").IsInterpolated().Map().Deprecated(), - docs.FieldString( - "fields_mapping", "An optional [Bloblang mapping](/docs/guides/bloblang/about) that can be used to specify extra fields to add to the log. If log fields are also added with the `fields` field then those values will override matching keys from this mapping.", - `root.reason = "cus I wana" +`+"```"+` +`). + Fields( + service.NewStringEnumField(logPFieldLevel, "FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE", "ALL"). + Description("The log level to use."). + LintRule(``). + Default("INFO"), + service.NewBloblangField(logPFieldFieldsMapping). + Description("An optional [Bloblang mapping](/docs/guides/bloblang/about) that can be used to specify extra fields to add to the log. If log fields are also added with the `fields` field then those values will override matching keys from this mapping."). + Examples( + `root.reason = "cus I wana" root.id = this.id root.age = this.user.age.number() root.kafka_topic = meta("kafka_topic")`, - ).AtVersion("3.40.0").IsBloblang(), - docs.FieldString("message", "The message to print.").IsInterpolated(), - ).ChildDefaultAndTypesFromStruct(processor.NewLogConfig()), - }) + ). + Optional(), + service.NewInterpolatedStringField(logPFieldMessage). + Description("The message to print."). + Default(""), + service.NewInterpolatedStringMapField(logPFieldFields). + Description("A map of fields to print along with the log message."). + Optional(). + Deprecated(), + ) +} + +func init() { + err := service.RegisterBatchProcessor( + "log", logProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + logLevel, err := conf.FieldString(logPFieldLevel) + if err != nil { + return nil, err + } + + messageStr, err := conf.FieldString(logPFieldMessage) + if err != nil { + return nil, err + } + + depFields, _ := conf.FieldStringMap(logPFieldFields) + if err != nil { + return nil, err + } + + fieldsMappingStr, _ := conf.FieldString(logPFieldFieldsMapping) + + mgr := interop.UnwrapManagement(res) + p, err := newLogProcessor(messageStr, logLevel, fieldsMappingStr, depFields, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("log", p, mgr)), nil + }) if err != nil { panic(err) } @@ -75,26 +114,26 @@ type logProcessor struct { fieldsMapping *mapping.Executor } -func newLogProcessor(conf processor.Config, mgr bundle.NewManagement, logger log.Modular) (processor.AutoObservedBatched, error) { - message, err := mgr.BloblEnvironment().NewField(conf.Log.Message) +func newLogProcessor(messageStr, levelStr, fieldsMappingStr string, depFields map[string]string, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { + message, err := mgr.BloblEnvironment().NewField(messageStr) if err != nil { return nil, fmt.Errorf("failed to parse message expression: %v", err) } l := &logProcessor{ - logger: logger, - level: conf.Log.Level, + logger: mgr.Logger(), + level: levelStr, fields: map[string]*field.Expression{}, message: message, } - if len(conf.Log.Fields) > 0 { - for k, v := range conf.Log.Fields { + if len(depFields) > 0 { + for k, v := range depFields { if l.fields[k], err = mgr.BloblEnvironment().NewField(v); err != nil { return nil, fmt.Errorf("failed to parse field '%v' expression: %v", k, err) } } } - if len(conf.Log.FieldsMapping) > 0 { - if l.fieldsMapping, err = mgr.BloblEnvironment().NewMapping(conf.Log.FieldsMapping); err != nil { + if len(fieldsMappingStr) > 0 { + if l.fieldsMapping, err = mgr.BloblEnvironment().NewMapping(fieldsMappingStr); err != nil { return nil, fmt.Errorf("failed to parse fields mapping: %w", err) } } diff --git a/internal/impl/pure/processor_log_test.go b/internal/impl/pure/processor_log_test.go index 079ebf98c0..386f5001dc 100644 --- a/internal/impl/pure/processor_log_test.go +++ b/internal/impl/pure/processor_log_test.go @@ -79,9 +79,11 @@ func (m *mockLog) Traceln(message string) { } func TestLogBadLevel(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "log" - conf.Log.Level = "does not exist" + conf, err := processor.FromYAML(` +log: + level: does not exist +`) + require.NoError(t, err) if _, err := mock.NewManager().NewProcessor(conf); err == nil { t.Error("expected err from bad log level") @@ -89,15 +91,16 @@ func TestLogBadLevel(t *testing.T) { } func TestLogLevelTrace(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "log" - conf.Log.Message = "${!json(\"foo\")}" - logMock := &mockLog{} levels := []string{"TRACE", "DEBUG", "INFO", "WARN", "ERROR"} for _, level := range levels { - conf.Log.Level = level + conf, err := processor.FromYAML(` +log: + message: '${!json("foo")}' + level: ` + level + ` +`) + require.NoError(t, err) mgr := mock.NewManager() mgr.L = logMock @@ -136,18 +139,18 @@ func TestLogLevelTrace(t *testing.T) { } func TestLogWithFields(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "log" - conf.Log.Message = "${!json(\"foo\")}" - conf.Log.Fields = map[string]string{ - "static": "foo", - "dynamic": "${!json(\"bar\")}", - } + conf, err := processor.FromYAML(` +log: + message: '${!json("foo")}' + level: INFO + fields: + static: foo + dynamic: '${!json("bar")}' +`) + require.NoError(t, err) logMock := &mockLog{} - conf.Log.Level = "INFO" - mgr := mock.NewManager() mgr.L = logMock @@ -203,18 +206,19 @@ func TestLogWithFields(t *testing.T) { } func TestLogWithFieldsMapping(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "log" - conf.Log.Message = "hello world" - conf.Log.FieldsMapping = ` -root.static = "static value" -root.age = this.age + 2 -root.is_cool = this.is_cool` + conf, err := processor.FromYAML(` +log: + message: 'hello world' + level: INFO + fields_mapping: | + root.static = "static value" + root.age = this.age + 2 + root.is_cool = this.is_cool +`) + require.NoError(t, err) logMock := &mockLog{} - conf.Log.Level = "INFO" - mgr := mock.NewManager() mgr.L = logMock diff --git a/internal/impl/pure/processor_metric.go b/internal/impl/pure/processor_metric.go index a45c2881c5..b48b358e7a 100644 --- a/internal/impl/pure/processor_metric.go +++ b/internal/impl/pure/processor_metric.go @@ -10,48 +10,79 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/field" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/metrics" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - return newMetricProcessor(conf, mgr, mgr.Logger(), mgr.Metrics()) - }, docs.ComponentSpec{ - Name: "metric", - Categories: []string{ - "Utility", - }, - Summary: "Emit custom metrics by extracting values from messages.", - Description: ` -This processor works by evaluating an [interpolated field ` + "`value`" + `](/docs/configuration/interpolation#bloblang-queries) for each message and updating a emitted metric according to the [type](#types). - -Custom metrics such as these are emitted along with Benthos internal metrics, where you can customize where metrics are sent, which metric names are emitted and rename them as/when appropriate. For more information check out the [metrics docs here](/docs/components/metrics/about).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("type", "The metric [type](#types) to create.").HasOptions( - "counter", - "counter_by", - "gauge", - "timing", - ), - docs.FieldString("name", "The name of the metric to create, this must be unique across all Benthos components otherwise it will overwrite those other metrics."), - docs.FieldString( - "labels", "A map of label names and values that can be used to enrich metrics. Labels are not supported by some metric destinations, in which case the metrics series are combined.", - map[string]string{ - "type": "${! json(\"doc.type\") }", - "topic": "${! meta(\"kafka_topic\") }", - }, - ).IsInterpolated().Map(), - docs.FieldString("value", "For some metric types specifies a value to set, increment. Certain metrics exporters such as Prometheus support floating point values, but those that do not will cast a floating point value into an integer.").IsInterpolated(), - ).ChildDefaultAndTypesFromStruct(processor.NewMetricConfig()), - Examples: []docs.AnnotatedExample{ - { - Title: "Counter", - Summary: "In this example we emit a counter metric called `Foos`, which increments for every message processed, and we label the metric with some metadata about where the message came from and a field from the document that states what type it is. We also configure our metrics to emit to CloudWatch, and explicitly only allow our custom metric and some internal Benthos metrics to emit.", - Config: ` +const ( + metProcFieldType = "type" + metProcFieldName = "name" + metProcFieldLabels = "labels" + metProcFieldValue = "value" +) + +func metProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary("Emit custom metrics by extracting values from messages."). + Description(` +This processor works by evaluating an [interpolated field `+"`value`"+`](/docs/configuration/interpolation#bloblang-queries) for each message and updating a emitted metric according to the [type](#types). + +Custom metrics such as these are emitted along with Benthos internal metrics, where you can customize where metrics are sent, which metric names are emitted and rename them as/when appropriate. For more information check out the [metrics docs here](/docs/components/metrics/about).`). + Footnotes(` +## Types + +### `+"`counter`"+` + +Increments a counter by exactly 1, the contents of `+"`value`"+` are ignored +by this type. + +### `+"`counter_by`"+` + +If the contents of `+"`value`"+` can be parsed as a positive integer value +then the counter is incremented by this value. + +For example, the following configuration will increment the value of the +`+"`count.custom.field` metric by the contents of `field.some.value`"+`: + +`+"```yaml"+` +pipeline: + processors: + - metric: + type: counter_by + name: CountCustomField + value: ${!json("field.some.value")} +`+"```"+` + +### `+"`gauge`"+` + +If the contents of `+"`value`"+` can be parsed as a positive integer value +then the gauge is set to this value. + +For example, the following configuration will set the value of the +`+"`gauge.custom.field` metric to the contents of `field.some.value`"+`: + +`+"```yaml"+` +pipeline: + processors: + - metric: + type: gauge + name: GaugeCustomField + value: ${!json("field.some.value")} +`+"```"+` + +### `+"`timing`"+` + +Equivalent to `+"`gauge`"+` where instead the metric is a timing. It is recommended that timing values are recorded in nanoseconds in order to be consistent with standard Benthos timing metrics, as in some cases these values are automatically converted into other units such as when exporting timings as histograms with Prometheus metrics.`). + Example( + "Counter", + "In this example we emit a counter metric called `Foos`, which increments for every message processed, and we label the metric with some metadata about where the message came from and a field from the document that states what type it is. We also configure our metrics to emit to CloudWatch, and explicitly only allow our custom metric and some internal Benthos metrics to emit.", + ` pipeline: processors: - metric: @@ -72,11 +103,11 @@ metrics: aws_cloudwatch: namespace: ProdConsumer `, - }, - { - Title: "Gauge", - Summary: "In this example we emit a gauge metric called `FooSize`, which is given a value extracted from JSON messages at the path `foo.size`. We then also configure our Prometheus metric exporter to only emit this custom metric and nothing else. We also label the metric with some metadata.", - Config: ` + ). + Example( + "Gauge", + "In this example we emit a gauge metric called `FooSize`, which is given a value extracted from JSON messages at the path `foo.size`. We then also configure our Prometheus metric exporter to only emit this custom metric and nothing else. We also label the metric with some metadata.", + ` pipeline: processors: - metric: @@ -90,63 +121,66 @@ metrics: mapping: 'if this != "FooSize" { deleted() }' prometheus: {} `, - }, - }, - Footnotes: ` -## Types - -### ` + "`counter`" + ` - -Increments a counter by exactly 1, the contents of ` + "`value`" + ` are ignored -by this type. - -### ` + "`counter_by`" + ` - -If the contents of ` + "`value`" + ` can be parsed as a positive integer value -then the counter is incremented by this value. - -For example, the following configuration will increment the value of the -` + "`count.custom.field` metric by the contents of `field.some.value`" + `: - -` + "```yaml" + ` -pipeline: - processors: - - metric: - type: counter_by - name: CountCustomField - value: ${!json("field.some.value")} -` + "```" + ` - -### ` + "`gauge`" + ` - -If the contents of ` + "`value`" + ` can be parsed as a positive integer value -then the gauge is set to this value. - -For example, the following configuration will set the value of the -` + "`gauge.custom.field` metric to the contents of `field.some.value`" + `: - -` + "```yaml" + ` -pipeline: - processors: - - metric: - type: gauge - name: GaugeCustomField - value: ${!json("field.some.value")} -` + "```" + ` - -### ` + "`timing`" + ` + ). + Fields( + service.NewStringEnumField(metProcFieldType, "counter", "counter_by", "gauge", "timing"). + Description("The metric [type](#types) to create."), + service.NewStringField(metProcFieldName). + Description("The name of the metric to create, this must be unique across all Benthos components otherwise it will overwrite those other metrics."), + service.NewInterpolatedStringMapField(metProcFieldLabels). + Description("A map of label names and values that can be used to enrich metrics. Labels are not supported by some metric destinations, in which case the metrics series are combined."). + Example(map[string]any{ + "type": "${! json(\"doc.type\") }", + "topic": "${! meta(\"kafka_topic\") }", + }). + Optional(), + service.NewInterpolatedStringField(metProcFieldValue). + Description("For some metric types specifies a value to set, increment. Certain metrics exporters such as Prometheus support floating point values, but those that do not will cast a floating point value into an integer."). + Default(""), + ) +} -Equivalent to ` + "`gauge`" + ` where instead the metric is a timing. It is recommended that timing values are recorded in nanoseconds in order to be consistent with standard Benthos timing metrics, as in some cases these values are automatically converted into other units such as when exporting timings as histograms with Prometheus metrics.`, - }) +func init() { + err := service.RegisterBatchProcessor( + "metric", metProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + procTypeStr, err := conf.FieldString(metProcFieldType) + if err != nil { + return nil, err + } + + procName, err := conf.FieldString(metProcFieldName) + if err != nil { + return nil, err + } + + var labelMap map[string]string + if conf.Contains(metProcFieldLabels) { + if labelMap, err = conf.FieldStringMap(metProcFieldLabels); err != nil { + return nil, err + } + } + + valueStr, err := conf.FieldString(metProcFieldValue) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newMetricProcessor(procTypeStr, procName, valueStr, labelMap, mgr) + if err != nil { + return nil, err + } + + return interop.NewUnwrapInternalBatchProcessor(p), nil + }) if err != nil { panic(err) } } type metricProcessor struct { - conf processor.Config - log log.Modular - stats metrics.Type + log log.Modular value *field.Expression labels labels @@ -194,32 +228,29 @@ func (l labels) values(index int, msg message.Batch) ([]string, error) { return values, nil } -func newMetricProcessor(conf processor.Config, mgr bundle.NewManagement, log log.Modular, stats metrics.Type) (processor.V1, error) { - value, err := mgr.BloblEnvironment().NewField(conf.Metric.Value) +func newMetricProcessor(typeStr, name, valueStr string, labels map[string]string, mgr bundle.NewManagement) (processor.V1, error) { + value, err := mgr.BloblEnvironment().NewField(valueStr) if err != nil { return nil, fmt.Errorf("failed to parse value expression: %v", err) } m := &metricProcessor{ - conf: conf, - log: log, - stats: stats, + log: mgr.Logger(), value: value, } - name := conf.Metric.Name if name == "" { return nil, errors.New("metric name must not be empty") } - labelNames := make([]string, 0, len(conf.Metric.Labels)) - for n := range conf.Metric.Labels { + labelNames := make([]string, 0, len(labels)) + for n := range labels { labelNames = append(labelNames, n) } sort.Strings(labelNames) for _, n := range labelNames { - v, err := mgr.BloblEnvironment().NewField(conf.Metric.Labels[n]) + v, err := mgr.BloblEnvironment().NewField(labels[n]) if err != nil { return nil, fmt.Errorf("failed to parse label '%v' expression: %v", n, err) } @@ -229,7 +260,8 @@ func newMetricProcessor(conf processor.Config, mgr bundle.NewManagement, log log }) } - switch strings.ToLower(conf.Metric.Type) { + stats := mgr.Metrics() + switch strings.ToLower(typeStr) { case "counter": if len(m.labels) > 0 { m.mCounterVec = stats.GetCounterVec(name, m.labels.names()...) @@ -259,7 +291,7 @@ func newMetricProcessor(conf processor.Config, mgr bundle.NewManagement, log log } m.handler = m.handleTimer default: - return nil, fmt.Errorf("metric type unrecognised: %v", conf.Metric.Type) + return nil, fmt.Errorf("metric type unrecognised: %v", typeStr) } return m, nil diff --git a/internal/impl/pure/processor_metric_test.go b/internal/impl/pure/processor_metric_test.go index 9109279f0c..5ea8587d8d 100644 --- a/internal/impl/pure/processor_metric_test.go +++ b/internal/impl/pure/processor_metric_test.go @@ -14,25 +14,33 @@ import ( ) func TestMetricBad(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "metric" - conf.Metric.Type = "bad type" - conf.Metric.Name = "some.path" - _, err := mock.NewManager().NewProcessor(conf) + conf, err := processor.FromYAML(` +metric: + type: bad type + name: some.path +`) + require.NoError(t, err) + + _, err = mock.NewManager().NewProcessor(conf) require.Error(t, err) - conf = processor.NewConfig() - conf.Type = "metric" + conf, err = processor.FromYAML(` +type: metric +`) + require.NoError(t, err) + _, err = mock.NewManager().NewProcessor(conf) require.Error(t, err) } func TestMetricCounter(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "metric" - conf.Metric.Type = "counter" - conf.Metric.Name = "foo.bar" - conf.Metric.Value = "${!json(\"foo.bar\")}" + conf, err := processor.FromYAML(` +metric: + type: counter + name: foo.bar + value: '${!json("foo.bar")}' +`) + require.NoError(t, err) mockMetrics := metrics.NewLocal() @@ -75,11 +83,13 @@ func TestMetricCounter(t *testing.T) { } func TestMetricCounterBy(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "metric" - conf.Metric.Type = "counter_by" - conf.Metric.Name = "foo.bar" - conf.Metric.Value = "${!json(\"foo.bar\")}" + conf, err := processor.FromYAML(` +metric: + type: counter_by + name: foo.bar + value: '${!json("foo.bar")}' +`) + require.NoError(t, err) mockMetrics := metrics.NewLocal() @@ -125,11 +135,13 @@ func TestMetricCounterBy(t *testing.T) { } func TestMetricGauge(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "metric" - conf.Metric.Type = "gauge" - conf.Metric.Name = "foo.bar" - conf.Metric.Value = "${!json(\"foo.bar\")}" + conf, err := processor.FromYAML(` +metric: + type: gauge + name: foo.bar + value: '${!json("foo.bar")}' +`) + require.NoError(t, err) mockMetrics := metrics.NewLocal() @@ -175,11 +187,13 @@ func TestMetricGauge(t *testing.T) { } func TestMetricTiming(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "metric" - conf.Metric.Type = "timing" - conf.Metric.Name = "foo.bar" - conf.Metric.Value = "${!json(\"foo.bar\")}" + conf, err := processor.FromYAML(` +metric: + type: timing + name: foo.bar + value: '${!json("foo.bar")}' +`) + require.NoError(t, err) mockMetrics := metrics.NewLocal() diff --git a/internal/impl/pure/processor_noop.go b/internal/impl/pure/processor_noop.go index 80606eaa2a..8314ebf7bd 100644 --- a/internal/impl/pure/processor_noop.go +++ b/internal/impl/pure/processor_noop.go @@ -3,20 +3,20 @@ package pure import ( "context" - "github.com/benthosdev/benthos/v4/internal/bundle" - "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) func init() { - err := bundle.AllProcessors.Add(func(c processor.Config, nm bundle.NewManagement) (processor.V1, error) { - return &noopProcessor{}, nil - }, docs.ComponentSpec{ - Name: "noop", - Summary: "Noop is a processor that does nothing, the message passes through unchanged. Why? Sometimes doing nothing is the braver option.", - Config: docs.FieldObject("", "").HasDefault(map[string]any{}), - }) + err := service.RegisterBatchProcessor("noop", service.NewConfigSpec(). + Stable(). + Summary("Noop is a processor that does nothing, the message passes through unchanged. Why? Sometimes doing nothing is the braver option."). + Field(service.NewObjectField("").Default(map[string]any{})), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + p := &noopProcessor{} + return interop.NewUnwrapInternalBatchProcessor(p), nil + }) if err != nil { panic(err) } diff --git a/internal/impl/pure/processor_parallel.go b/internal/impl/pure/processor_parallel.go index f844eb07b5..e8b84736e6 100644 --- a/internal/impl/pure/processor_parallel.go +++ b/internal/impl/pure/processor_parallel.go @@ -2,38 +2,55 @@ package pure import ( "context" - "strconv" "sync" - "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + parProcFieldCap = "cap" + parProcFieldProcessors = "processors" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newParallel(conf.Parallel, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("parallel", p, mgr), nil - }, docs.ComponentSpec{ - Name: "parallel", - Categories: []string{ - "Composition", - }, - Summary: ` -A processor that applies a list of child processors to messages of a batch as though they were each a batch of one message (similar to the ` + "[`for_each`](/docs/components/processors/for_each)" + ` processor), but where each message is processed in parallel.`, - Description: ` -The field ` + "`cap`" + `, if greater than zero, caps the maximum number of parallel processing threads. - -The functionality of this processor depends on being applied across messages that are batched. You can find out more about batching [in this doc](/docs/configuration/batching).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldInt("cap", "The maximum number of messages to have processing at a given time."), - docs.FieldProcessor("processors", "A list of child processors to apply.").Array(), - ).ChildDefaultAndTypesFromStruct(processor.NewParallelConfig()), - }) + err := service.RegisterBatchProcessor( + "parallel", service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary(`A processor that applies a list of child processors to messages of a batch as though they were each a batch of one message (similar to the `+"[`for_each`](/docs/components/processors/for_each)"+` processor), but where each message is processed in parallel.`). + Description(` +The field `+"`cap`"+`, if greater than zero, caps the maximum number of parallel processing threads. + +The functionality of this processor depends on being applied across messages that are batched. You can find out more about batching [in this doc](/docs/configuration/batching).`). + Fields( + service.NewIntField(parProcFieldCap). + Description("The maximum number of messages to have processing at a given time."). + Default(0), + service.NewProcessorListField(parProcFieldProcessors). + Description("A list of child processors to apply."), + ), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + var p parallelProc + var err error + + if p.cap, err = conf.FieldInt(parProcFieldCap); err != nil { + return nil, err + } + + var pChildren []*service.OwnedProcessor + if pChildren, err = conf.FieldProcessorList(parProcFieldProcessors); err != nil { + return nil, err + } + p.children = make([]processor.V1, len(pChildren)) + for i, c := range pChildren { + p.children[i] = interop.UnwrapOwnedProcessor(c) + } + + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("parallel", &p, interop.UnwrapManagement(mgr))), nil + }) if err != nil { panic(err) } @@ -44,22 +61,6 @@ type parallelProc struct { cap int } -func newParallel(conf processor.ParallelConfig, mgr bundle.NewManagement) (processor.AutoObservedBatched, error) { - var children []processor.V1 - for i, pconf := range conf.Processors { - pMgr := mgr.IntoPath("parallel", strconv.Itoa(i)) - proc, err := pMgr.NewProcessor(pconf) - if err != nil { - return nil, err - } - children = append(children, proc) - } - return ¶llelProc{ - children: children, - cap: conf.Cap, - }, nil -} - func (p *parallelProc) ProcessBatch(ctx *processor.BatchProcContext, msg message.Batch) ([]message.Batch, error) { resultMsgs := make([]message.Batch, msg.Len()) _ = msg.Iter(func(i int, p *message.Part) error { diff --git a/internal/impl/pure/processor_parse_log.go b/internal/impl/pure/processor_parse_log.go index cbd9d22d83..afa0e4a11c 100644 --- a/internal/impl/pure/processor_parse_log.go +++ b/internal/impl/pure/processor_parse_log.go @@ -11,83 +11,127 @@ import ( "github.com/influxdata/go-syslog/v3/rfc5424" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newParseLog(conf.ParseLog, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("parse_log", p, mgr), nil - }, docs.ComponentSpec{ - Name: "parse_log", - Categories: []string{ - "Parsing", - }, - Summary: ` -Parses common log [formats](#formats) into [structured data](#codecs). This is -easier and often much faster than ` + "[`grok`](/docs/components/processors/grok)" + `.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("format", "A common log [format](#formats) to parse.").HasOptions( - "syslog_rfc5424", "syslog_rfc3164", - ), - docs.FieldString("codec", "Specifies the structured format to parse a log into.").HasOptions( - "json", - ), - docs.FieldBool("best_effort", "Still returns partially parsed messages even if an error occurs.").Advanced(), - docs.FieldBool("allow_rfc3339", "Also accept timestamps in rfc3339 format while parsing."+ - " Applicable to format `syslog_rfc3164`.").Advanced(), - docs.FieldString("default_year", "Sets the strategy used to set the year for rfc3164 timestamps."+ - " Applicable to format `syslog_rfc3164`. When set to `current` the current year will be set, when"+ - " set to an integer that value will be used. Leave this field empty to not set a default year at all.").Advanced(), - docs.FieldString("default_timezone", "Sets the strategy to decide the timezone for rfc3164 timestamps."+ - " Applicable to format `syslog_rfc3164`. This value should follow the [time.LoadLocation](https://golang.org/pkg/time/#LoadLocation) format.").Advanced(), - ).ChildDefaultAndTypesFromStruct(processor.NewParseLogConfig()), - Footnotes: ` +const ( + plpFieldFormat = "format" + plpFieldCodec = "codec" + plpFieldBestEffort = "best_effort" + plpFieldWithRFC3339 = "allow_rfc3339" + plpFieldWithYear = "default_year" + plpFieldWithTimezone = "default_timezone" +) + +func parseLogSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Parsing"). + Stable(). + Summary(`Parses common log [formats](#formats) into [structured data](#codecs). This is easier and often much faster than `+"[`grok`](/docs/components/processors/grok)"+`.`). + Footnotes(` ## Codecs -Currently the only supported structured data codec is ` + "`json`" + `. +Currently the only supported structured data codec is `+"`json`"+`. ## Formats -### ` + "`syslog_rfc5424`" + ` - -Attempts to parse a log following the [Syslog rfc5424](https://tools.ietf.org/html/rfc5424) -spec. The resulting structured document may contain any of the following fields: - -- ` + "`message`" + ` (string) -- ` + "`timestamp`" + ` (string, RFC3339) -- ` + "`facility`" + ` (int) -- ` + "`severity`" + ` (int) -- ` + "`priority`" + ` (int) -- ` + "`version`" + ` (int) -- ` + "`hostname`" + ` (string) -- ` + "`procid`" + ` (string) -- ` + "`appname`" + ` (string) -- ` + "`msgid`" + ` (string) -- ` + "`structureddata`" + ` (object) - -### ` + "`syslog_rfc3164`" + ` - -Attempts to parse a log following the [Syslog rfc3164](https://tools.ietf.org/html/rfc3164) -spec. The resulting structured document may contain any of the following fields: - -- ` + "`message`" + ` (string) -- ` + "`timestamp`" + ` (string, RFC3339) -- ` + "`facility`" + ` (int) -- ` + "`severity`" + ` (int) -- ` + "`priority`" + ` (int) -- ` + "`hostname`" + ` (string) -- ` + "`procid`" + ` (string) -- ` + "`appname`" + ` (string) -- ` + "`msgid`" + ` (string) -`, - }) +### `+"`syslog_rfc5424`"+` + +Attempts to parse a log following the [Syslog rfc5424](https://tools.ietf.org/html/rfc5424) spec. The resulting structured document may contain any of the following fields: + +- `+"`message`"+` (string) +- `+"`timestamp`"+` (string, RFC3339) +- `+"`facility`"+` (int) +- `+"`severity`"+` (int) +- `+"`priority`"+` (int) +- `+"`version`"+` (int) +- `+"`hostname`"+` (string) +- `+"`procid`"+` (string) +- `+"`appname`"+` (string) +- `+"`msgid`"+` (string) +- `+"`structureddata`"+` (object) + +### `+"`syslog_rfc3164`"+` + +Attempts to parse a log following the [Syslog rfc3164](https://tools.ietf.org/html/rfc3164) spec. The resulting structured document may contain any of the following fields: + +- `+"`message`"+` (string) +- `+"`timestamp`"+` (string, RFC3339) +- `+"`facility`"+` (int) +- `+"`severity`"+` (int) +- `+"`priority`"+` (int) +- `+"`hostname`"+` (string) +- `+"`procid`"+` (string) +- `+"`appname`"+` (string) +- `+"`msgid`"+` (string) +`). + Fields( + service.NewStringEnumField(plpFieldFormat, "syslog_rfc5424", "syslog_rfc3164"). + Description("A common log [format](#formats) to parse."), + service.NewBoolField(plpFieldBestEffort). + Description("Still returns partially parsed messages even if an error occurs."). + Advanced(). + Default(true), + service.NewBoolField(plpFieldWithRFC3339). + Description("Also accept timestamps in rfc3339 format while parsing. Applicable to format `syslog_rfc3164`."). + Advanced(). + Default(true), + service.NewStringField(plpFieldWithYear). + Description("Sets the strategy used to set the year for rfc3164 timestamps. Applicable to format `syslog_rfc3164`. When set to `current` the current year will be set, when set to an integer that value will be used. Leave this field empty to not set a default year at all."). + Advanced(). + Default("current"), + service.NewStringField(plpFieldWithTimezone). + Description("Sets the strategy to decide the timezone for rfc3164 timestamps. Applicable to format `syslog_rfc3164`. This value should follow the [time.LoadLocation](https://golang.org/pkg/time/#LoadLocation) format."). + Advanced(). + Default("UTC"), + service.NewStringField(plpFieldCodec).Deprecated(), + ) +} + +type parseLogConfig struct { + Format string + Codec string + BestEffort bool + WithRFC3339 bool + WithYear string + WithTimezone string +} + +func init() { + err := service.RegisterBatchProcessor( + + "parse_log", parseLogSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + var c parseLogConfig + var err error + + if c.Format, err = conf.FieldString(plpFieldFormat); err != nil { + return nil, err + } + if c.BestEffort, err = conf.FieldBool(plpFieldBestEffort); err != nil { + return nil, err + } + if c.WithRFC3339, err = conf.FieldBool(plpFieldWithRFC3339); err != nil { + return nil, err + } + if c.WithYear, err = conf.FieldString(plpFieldWithYear); err != nil { + return nil, err + } + if c.WithTimezone, err = conf.FieldString(plpFieldWithTimezone); err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newParseLog(c, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("parse_log", p, mgr)), nil + }) if err != nil { panic(err) } @@ -244,7 +288,7 @@ type parseLogProc struct { log log.Modular } -func newParseLog(conf processor.ParseLogConfig, mgr bundle.NewManagement) (processor.AutoObserved, error) { +func newParseLog(conf parseLogConfig, mgr bundle.NewManagement) (processor.AutoObserved, error) { s := &parseLogProc{ formatStr: conf.Format, log: mgr.Logger(), diff --git a/internal/impl/pure/processor_parse_log_test.go b/internal/impl/pure/processor_parse_log_test.go index 39cd121b73..f69736c98e 100644 --- a/internal/impl/pure/processor_parse_log_test.go +++ b/internal/impl/pure/processor_parse_log_test.go @@ -6,6 +6,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" "github.com/benthosdev/benthos/v4/internal/message" @@ -19,14 +21,12 @@ func TestParseLogCases(t *testing.T) { input string output string format string - codec string bestEff bool } tests := []testCase{ { name: "valid syslog_rfc5424 input, valid json output", format: "syslog_rfc5424", - codec: "json", bestEff: true, input: `<42>4 2049-10-11T22:14:15.003Z toaster.smarthome myapp - 2 [home01 device_id="43"] failed to make a toast.`, output: `{"appname":"myapp","facility":5,"hostname":"toaster.smarthome","message":"failed to make a toast.","msgid":"2","priority":42,"severity":2,"structureddata":{"home01":{"device_id":"43"}},"timestamp":"2049-10-11T22:14:15.003Z","version":4}`, @@ -34,7 +34,6 @@ func TestParseLogCases(t *testing.T) { { name: "invalid syslog_rfc5424 input, invalid json output", format: "syslog_rfc5424", - codec: "json", bestEff: true, input: `not a syslog at all.`, output: `not a syslog at all.`, @@ -42,7 +41,6 @@ func TestParseLogCases(t *testing.T) { { name: "valid syslog_rfc3164 input, valid json output", format: "syslog_rfc3164", - codec: "json", bestEff: true, input: `<28>Dec 2 16:49:23 host app[23410]: Test`, output: fmt.Sprintf(`{"appname":"app","facility":3,"hostname":"host","message":"Test","priority":28,"procid":"23410","severity":4,"timestamp":"%v-12-02T16:49:23Z"}`, time.Now().Year()), @@ -50,11 +48,13 @@ func TestParseLogCases(t *testing.T) { } for _, test := range tests { - conf := processor.NewConfig() - conf.Type = "parse_log" - conf.ParseLog.Format = test.format - conf.ParseLog.Codec = test.codec - conf.ParseLog.BestEffort = test.bestEff + conf, err := processor.FromYAML(fmt.Sprintf(` +parse_log: + format: %v + best_effort: %v +`, test.format, test.bestEff)) + require.NoError(t, err) + proc, err := mock.NewManager().NewProcessor(conf) if err != nil { t.Fatal(err) @@ -93,10 +93,13 @@ func TestParseLogRFC5424(t *testing.T) { }, } - conf := processor.NewConfig() - conf.Type = "parse_log" - conf.ParseLog.Format = "syslog_rfc5424" - conf.ParseLog.BestEffort = true + conf, err := processor.FromYAML(` +parse_log: + format: syslog_rfc5424 + best_effort: true +`) + require.NoError(t, err) + proc, err := mock.NewManager().NewProcessor(conf) if err != nil { t.Fatal(err) diff --git a/internal/impl/pure/processor_rate_limit.go b/internal/impl/pure/processor_rate_limit.go index 2c88fafdaf..23933763d4 100644 --- a/internal/impl/pure/processor_rate_limit.go +++ b/internal/impl/pure/processor_rate_limit.go @@ -8,33 +8,42 @@ import ( "github.com/benthosdev/benthos/v4/internal/bundle" "github.com/benthosdev/benthos/v4/internal/component" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/component/ratelimit" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) +const ( + rlimitFieldResource = "resource" +) + +func rlimitProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary(`Throttles the throughput of a pipeline according to a specified ` + "[`rate_limit`](/docs/components/rate_limits/about)" + ` resource. Rate limits are shared across components and therefore apply globally to all processing pipelines.`). + Field(service.NewStringField(rlimitFieldResource). + Description("The target [`rate_limit` resource](/docs/components/rate_limits/about).")) +} + func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newRateLimitProc(conf.RateLimit, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("rate_limit", p, mgr), nil - }, docs.ComponentSpec{ - Name: "rate_limit", - Categories: []string{ - "Utility", - }, - Summary: ` -Throttles the throughput of a pipeline according to a specified -` + "[`rate_limit`](/docs/components/rate_limits/about)" + ` resource. Rate limits are -shared across components and therefore apply globally to all processing -pipelines.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("resource", "The target [`rate_limit` resource](/docs/components/rate_limits/about).").HasDefault(""), - ), - }) + err := service.RegisterBatchProcessor( + "rate_limit", rlimitProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + resStr, err := conf.FieldString(rlimitFieldResource) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + r, err := newRateLimitProc(resStr, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedProcessor("rate_limit", r, mgr)), nil + }) if err != nil { panic(err) } @@ -48,12 +57,12 @@ type rateLimitProc struct { closeOnce sync.Once } -func newRateLimitProc(conf processor.RateLimitConfig, mgr bundle.NewManagement) (*rateLimitProc, error) { - if !mgr.ProbeRateLimit(conf.Resource) { - return nil, fmt.Errorf("rate limit resource '%v' was not found", conf.Resource) +func newRateLimitProc(resStr string, mgr bundle.NewManagement) (*rateLimitProc, error) { + if !mgr.ProbeRateLimit(resStr) { + return nil, fmt.Errorf("rate limit resource '%v' was not found", resStr) } r := &rateLimitProc{ - rlName: conf.Resource, + rlName: resStr, mgr: mgr, closeChan: make(chan struct{}), } diff --git a/internal/impl/pure/processor_rate_limit_test.go b/internal/impl/pure/processor_rate_limit_test.go index 45d5e7f851..754e46364c 100644 --- a/internal/impl/pure/processor_rate_limit_test.go +++ b/internal/impl/pure/processor_rate_limit_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -27,9 +28,12 @@ func TestRateLimitBasic(t *testing.T) { mgr := mock.NewManager() mgr.RateLimits["foo"] = rlFn - conf := processor.NewConfig() - conf.Type = "rate_limit" - conf.RateLimit.Resource = "foo" + conf, err := processor.FromYAML(` +rate_limit: + resource: foo +`) + require.NoError(t, err) + proc, err := mgr.NewProcessor(conf) if err != nil { t.Fatal(err) @@ -67,9 +71,12 @@ func TestRateLimitErroredOut(t *testing.T) { mgr := mock.NewManager() mgr.RateLimits["foo"] = rlFn - conf := processor.NewConfig() - conf.Type = "rate_limit" - conf.RateLimit.Resource = "foo" + conf, err := processor.FromYAML(` +rate_limit: + resource: foo +`) + require.NoError(t, err) + proc, err := mgr.NewProcessor(conf) if err != nil { t.Fatal(err) @@ -117,9 +124,12 @@ func TestRateLimitBlocked(t *testing.T) { mgr := mock.NewManager() mgr.RateLimits["foo"] = rlFn - conf := processor.NewConfig() - conf.Type = "rate_limit" - conf.RateLimit.Resource = "foo" + conf, err := processor.FromYAML(` +rate_limit: + resource: foo +`) + require.NoError(t, err) + proc, err := mgr.NewProcessor(conf) if err != nil { t.Fatal(err) diff --git a/internal/impl/pure/processor_resource.go b/internal/impl/pure/processor_resource.go index d132d15713..74d32c510e 100644 --- a/internal/impl/pure/processor_resource.go +++ b/internal/impl/pure/processor_resource.go @@ -5,37 +5,33 @@ import ( "fmt" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - return newResourceProcessor(conf, mgr, mgr.Logger()) - }, docs.ComponentSpec{ - Name: "resource", - Categories: []string{ - "Utility", - }, - Summary: ` -Resource is a processor type that runs a processor resource identified by its label.`, - Description: ` + err := service.RegisterBatchProcessor("resource", service.NewConfigSpec(). + Stable(). + Categories("Utility"). + Summary("Resource is a processor type that runs a processor resource identified by its label."). + Description(` This processor allows you to reference the same configured processor resource in multiple places, and can also tidy up large nested configs. For example, the config: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - mapping: | root.message = this root.meta.link_count = this.links.length() root.user.age = this.user.age.number() -` + "```" + ` +`+"```"+` Is equivalent to: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - resource: foo_proc @@ -46,11 +42,21 @@ processor_resources: root.message = this root.meta.link_count = this.links.length() root.user.age = this.user.age.number() -` + "```" + ` +`+"```"+` -You can find out more about resources [in this document.](/docs/configuration/resources)`, - Config: docs.FieldString("", "").HasDefault(""), - }) +You can find out more about resources [in this document.](/docs/configuration/resources)`). + Field(service.NewStringField("").Default("")), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + name, err := conf.FieldString() + if err != nil { + return nil, err + } + p, err := newResourceProcessor(name, interop.UnwrapManagement(mgr)) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(p), nil + }) if err != nil { panic(err) } @@ -62,14 +68,14 @@ type resourceProcessor struct { log log.Modular } -func newResourceProcessor(conf processor.Config, mgr bundle.NewManagement, log log.Modular) (*resourceProcessor, error) { - if !mgr.ProbeProcessor(conf.Resource) { - return nil, fmt.Errorf("processor resource '%v' was not found", conf.Resource) +func newResourceProcessor(name string, mgr bundle.NewManagement) (*resourceProcessor, error) { + if !mgr.ProbeProcessor(name) { + return nil, fmt.Errorf("processor resource '%v' was not found", name) } return &resourceProcessor{ mgr: mgr, - name: conf.Resource, - log: log, + name: name, + log: mgr.Logger(), }, nil } diff --git a/internal/impl/pure/processor_resource_test.go b/internal/impl/pure/processor_resource_test.go index 4221a0b54f..406a836d6c 100644 --- a/internal/impl/pure/processor_resource_test.go +++ b/internal/impl/pure/processor_resource_test.go @@ -14,7 +14,7 @@ import ( func TestResourceProc(t *testing.T) { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = `root = "foo: " + content()` + conf.Plugin = `root = "foo: " + content()` mgr := mock.NewManager() @@ -33,7 +33,7 @@ func TestResourceProc(t *testing.T) { nConf := processor.NewConfig() nConf.Type = "resource" - nConf.Resource = "foo" + nConf.Plugin = "foo" p, err := mgr.NewProcessor(nConf) if err != nil { @@ -55,7 +55,7 @@ func TestResourceProc(t *testing.T) { func TestResourceBadName(t *testing.T) { conf := processor.NewConfig() conf.Type = "resource" - conf.Resource = "foo" + conf.Plugin = "foo" _, err := mock.NewManager().NewProcessor(conf) if err == nil { diff --git a/internal/impl/pure/processor_select_parts.go b/internal/impl/pure/processor_select_parts.go index 58d898bb71..02047995f9 100644 --- a/internal/impl/pure/processor_select_parts.go +++ b/internal/impl/pure/processor_select_parts.go @@ -3,45 +3,46 @@ package pure import ( "context" - "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + spFieldParts = "parts" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newSelectParts(conf.SelectParts, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("select_parts", p, mgr), nil - }, docs.ComponentSpec{ - Name: "select_parts", - Categories: []string{ - "Utility", - }, - Summary: ` -Cherry pick a set of messages from a batch by their index. Indexes larger than -the number of messages are simply ignored.`, - Description: ` -The selected parts are added to the new message batch in the same order as the -selection array. E.g. with 'parts' set to [ 2, 0, 1 ] and the message parts -[ '0', '1', '2', '3' ], the output will be [ '2', '0', '1' ]. + err := service.RegisterBatchProcessor( + "select_parts", service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary("Cherry pick a set of messages from a batch by their index. Indexes larger than the number of messages are simply ignored."). + Description(` +The selected parts are added to the new message batch in the same order as the selection array. E.g. with 'parts' set to [ 2, 0, 1 ] and the message parts [ '0', '1', '2', '3' ], the output will be [ '2', '0', '1' ]. + +If none of the selected parts exist in the input batch (resulting in an empty output message) the batch is dropped entirely. + +Message indexes can be negative, and if so the part will be selected from the end counting backwards starting from -1. E.g. if index = -1 then the selected part will be the last part of the message, if index = -2 then the part before the last element with be selected, and so on. -If none of the selected parts exist in the input batch (resulting in an empty -output message) the batch is dropped entirely. +This processor is only applicable to [batched messages](/docs/configuration/batching).`). + Field(service.NewIntListField(spFieldParts). + Description(`An array of message indexes of a batch. Indexes can be negative, and if so the part will be selected from the end counting backwards starting from -1.`). + Default([]any{})), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + partIndexes, err := conf.FieldIntList(spFieldParts) + if err != nil { + return nil, err + } -Message indexes can be negative, and if so the part will be selected from the -end counting backwards starting from -1. E.g. if index = -1 then the selected -part will be the last part of the message, if index = -2 then the part before -the last element with be selected, and so on. + proc, err := newSelectParts(partIndexes) + if err != nil { + return nil, err + } -This processor is only applicable to [batched messages](/docs/configuration/batching).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldInt("parts", `An array of message indexes of a batch. Indexes can be negative, and if so the part will be selected from the end counting backwards starting from -1.`).Array().HasDefault([]any{}), - ), - }) + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("select_parts", proc, interop.UnwrapManagement(mgr))), nil + }) if err != nil { panic(err) } @@ -51,9 +52,9 @@ type selectPartsProc struct { parts []int } -func newSelectParts(conf processor.SelectPartsConfig, mgr bundle.NewManagement) (*selectPartsProc, error) { +func newSelectParts(parts []int) (*selectPartsProc, error) { return &selectPartsProc{ - parts: conf.Parts, + parts: parts, }, nil } diff --git a/internal/impl/pure/processor_select_parts_test.go b/internal/impl/pure/processor_select_parts_test.go index 40dca80cc0..bf240d66a6 100644 --- a/internal/impl/pure/processor_select_parts_test.go +++ b/internal/impl/pure/processor_select_parts_test.go @@ -2,18 +2,23 @@ package pure_test import ( "context" + "fmt" "reflect" "testing" + "github.com/stretchr/testify/require" + "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" "github.com/benthosdev/benthos/v4/internal/message" ) func TestSelectParts(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "select_parts" - conf.SelectParts.Parts = []int{1, 3} + conf, err := processor.FromYAML(` +select_parts: + parts: [ 1, 3 ] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -79,10 +84,6 @@ func TestSelectParts(t *testing.T) { } func TestSelectPartsIndexBounds(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "select_parts" - conf.SelectParts.Parts = []int{1, 3} - input := [][]byte{ []byte("0"), []byte("1"), @@ -105,7 +106,12 @@ func TestSelectPartsIndexBounds(t *testing.T) { } for i, exp := range tests { - conf.SelectParts.Parts = []int{i} + conf, err := processor.FromYAML(fmt.Sprintf(` +select_parts: + parts: [ %v ] +`, i)) + require.NoError(t, err) + proc, err := mock.NewManager().NewProcessor(conf) if err != nil { t.Fatal(err) @@ -124,9 +130,11 @@ func TestSelectPartsIndexBounds(t *testing.T) { } func TestSelectPartsEmpty(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "select_parts" - conf.SelectParts.Parts = []int{3} + conf, err := processor.FromYAML(` +select_parts: + parts: [ 3 ] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_sleep.go b/internal/impl/pure/processor_sleep.go index ed309576ca..47c0544235 100644 --- a/internal/impl/pure/processor_sleep.go +++ b/internal/impl/pure/processor_sleep.go @@ -9,29 +9,37 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/field" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + spFieldDuration = "duration" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newSleep(conf.Sleep, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("sleep", p, mgr), nil - }, docs.ComponentSpec{ - Name: "sleep", - Categories: []string{ - "Utility", - }, - Summary: `Sleep for a period of time specified as a duration string for each message. This processor will interpolate functions within the ` + "`duration`" + ` field, you can find a list of functions [here](/docs/configuration/interpolation#bloblang-queries).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldInterpolatedString("duration", "The duration of time to sleep for each execution.").HasDefault(""), - ), - }) + err := service.RegisterBatchProcessor("sleep", service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary(`Sleep for a period of time specified as a duration string for each message. This processor will interpolate functions within the `+"`duration`"+` field, you can find a list of functions [here](/docs/configuration/interpolation#bloblang-queries).`). + Field(service.NewInterpolatedStringField(spFieldDuration). + Description("The duration of time to sleep for each execution.")), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + sleepStr, err := conf.FieldString(spFieldDuration) + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p, err := newSleep(sleepStr, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("sleep", p, mgr)), nil + }) if err != nil { panic(err) } @@ -44,8 +52,8 @@ type sleepProc struct { log log.Modular } -func newSleep(conf processor.SleepConfig, mgr bundle.NewManagement) (*sleepProc, error) { - durationStr, err := mgr.BloblEnvironment().NewField(conf.Duration) +func newSleep(sleepStr string, mgr bundle.NewManagement) (*sleepProc, error) { + durationStr, err := mgr.BloblEnvironment().NewField(sleepStr) if err != nil { return nil, fmt.Errorf("failed to parse duration expression: %v", err) } diff --git a/internal/impl/pure/processor_sleep_test.go b/internal/impl/pure/processor_sleep_test.go index 4c8d146111..d228d7227e 100644 --- a/internal/impl/pure/processor_sleep_test.go +++ b/internal/impl/pure/processor_sleep_test.go @@ -14,9 +14,11 @@ import ( ) func TestSleep(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "sleep" - conf.Sleep.Duration = "1ns" + conf, err := processor.FromYAML(` +sleep: + duration: 1ns +`) + require.NoError(t, err) slp, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -32,9 +34,11 @@ func TestSleep(t *testing.T) { } func TestSleepExit(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "sleep" - conf.Sleep.Duration = "10s" + conf, err := processor.FromYAML(` +sleep: + duration: 10s +`) + require.NoError(t, err) slp, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -59,9 +63,11 @@ func TestSleepExit(t *testing.T) { } func TestSleep200Millisecond(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "sleep" - conf.Sleep.Duration = "200ms" + conf, err := processor.FromYAML(` +sleep: + duration: 200ms +`) + require.NoError(t, err) slp, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -80,9 +86,11 @@ func TestSleep200Millisecond(t *testing.T) { } func TestSleepInterpolated(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "sleep" - conf.Sleep.Duration = "${!json(\"foo\")}ms" + conf, err := processor.FromYAML(` +sleep: + duration: '${!json("foo")}ms' +`) + require.NoError(t, err) slp, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_split.go b/internal/impl/pure/processor_split.go index d9006b6f58..4c8a9e3b95 100644 --- a/internal/impl/pure/processor_split.go +++ b/internal/impl/pure/processor_split.go @@ -3,36 +3,49 @@ package pure import ( "context" - "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + splitPFieldSize = "size" + splitPFieldByteSize = "byte_size" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newSplit(conf.Split, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("split", p, mgr), nil - }, docs.ComponentSpec{ - Name: "split", - Categories: []string{ - "Utility", - }, - Summary: ` -Breaks message batches (synonymous with multiple part messages) into smaller batches. The size of the resulting batches are determined either by a discrete size or, if the field ` + "`byte_size`" + ` is non-zero, then by total size in bytes (which ever limit is reached first).`, - Description: ` -This processor is for breaking batches down into smaller ones. In order to break a single message out into multiple messages use the ` + "[`unarchive` processor](/docs/components/processors/unarchive)" + `. + err := service.RegisterBatchProcessor( + "split", service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary(`Breaks message batches (synonymous with multiple part messages) into smaller batches. The size of the resulting batches are determined either by a discrete size or, if the field `+"`byte_size`"+` is non-zero, then by total size in bytes (which ever limit is reached first).`). + Description(` +This processor is for breaking batches down into smaller ones. In order to break a single message out into multiple messages use the `+"[`unarchive` processor](/docs/components/processors/unarchive)"+`. -If there is a remainder of messages after splitting a batch the remainder is also sent as a single batch. For example, if your target size was 10, and the processor received a batch of 95 message parts, the result would be 9 batches of 10 messages followed by a batch of 5 messages.`, - Config: docs.FieldComponent().WithChildren( - docs.FieldInt("size", "The target number of messages.").HasDefault(1), - docs.FieldInt("byte_size", "An optional target of total message bytes.").HasDefault(0), - ), - }) +If there is a remainder of messages after splitting a batch the remainder is also sent as a single batch. For example, if your target size was 10, and the processor received a batch of 95 message parts, the result would be 9 batches of 10 messages followed by a batch of 5 messages.`). + Fields( + service.NewIntField(splitPFieldSize). + Description("The target number of messages."). + Default(1), + service.NewIntField(splitPFieldByteSize). + Description("An optional target of total message bytes."). + Default(0), + ), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + mgr := interop.UnwrapManagement(res) + s := &splitProc{log: mgr.Logger()} + + var err error + if s.size, err = conf.FieldInt(splitPFieldSize); err != nil { + return nil, err + } + if s.byteSize, err = conf.FieldInt(splitPFieldByteSize); err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("split", s, mgr)), nil + }) if err != nil { panic(err) } @@ -45,14 +58,6 @@ type splitProc struct { byteSize int } -func newSplit(conf processor.SplitConfig, mgr bundle.NewManagement) (*splitProc, error) { - return &splitProc{ - log: mgr.Logger(), - size: conf.Size, - byteSize: conf.ByteSize, - }, nil -} - func (s *splitProc) ProcessBatch(ctx *processor.BatchProcContext, msg message.Batch) ([]message.Batch, error) { if msg.Len() == 0 { return nil, nil diff --git a/internal/impl/pure/processor_split_test.go b/internal/impl/pure/processor_split_test.go index 636f29230c..66d850d18f 100644 --- a/internal/impl/pure/processor_split_test.go +++ b/internal/impl/pure/processor_split_test.go @@ -4,6 +4,8 @@ import ( "context" "testing" + "github.com/stretchr/testify/require" + "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" "github.com/benthosdev/benthos/v4/internal/message" @@ -58,9 +60,11 @@ func TestSplitToSingleParts(t *testing.T) { } func TestSplitToMultipleParts(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "split" - conf.Split.Size = 2 + conf, err := processor.FromYAML(` +split: + size: 2 +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -95,10 +99,12 @@ func TestSplitToMultipleParts(t *testing.T) { } func TestSplitByBytes(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "split" - conf.Split.Size = 0 - conf.Split.ByteSize = 6 + conf, err := processor.FromYAML(` +split: + size: 0 + byte_size: 6 +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -132,10 +138,12 @@ func TestSplitByBytes(t *testing.T) { } func TestSplitByBytesTooLarge(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "split" - conf.Split.Size = 0 - conf.Split.ByteSize = 2 + conf, err := processor.FromYAML(` +split: + size: 0 + byte_size: 2 +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_switch.go b/internal/impl/pure/processor_switch.go index b3471e6c59..f512be3372 100644 --- a/internal/impl/pure/processor_switch.go +++ b/internal/impl/pure/processor_switch.go @@ -2,64 +2,42 @@ package pure import ( "context" + "errors" "fmt" "sort" - "strconv" "github.com/benthosdev/benthos/v4/internal/bloblang/mapping" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newSwitchProc(conf.Switch, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("switch", p, mgr), nil - }, docs.ComponentSpec{ - Name: "switch", - Categories: []string{ - "Composition", - }, - Summary: ` -Conditionally processes messages based on their contents.`, - Description: ` -For each switch case a [Bloblang query](/docs/guides/bloblang/about) is checked and, if the result is true (or the check is empty) the child processors are executed on the message.`, - Footnotes: ` +const ( + spFieldCheck = "check" + spFieldProcessors = "processors" + spFieldFallthrough = "fallthrough" +) + +func switchProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary(`Conditionally processes messages based on their contents.`). + Description(`For each switch case a [Bloblang query](/docs/guides/bloblang/about) is checked and, if the result is true (or the check is empty) the child processors are executed on the message.`). + Footnotes(` ## Batching When a switch processor executes on a [batch of messages](/docs/configuration/batching) they are checked individually and can be matched independently against cases. During processing the messages matched against a case are processed as a batch, although the ordering of messages during case processing cannot be guaranteed to match the order as received. -At the end of switch processing the resulting batch will follow the same ordering as the batch was received. If any child processors have split or otherwise grouped messages this grouping will be lost as the result of a switch is always a single batch. In order to perform conditional grouping and/or splitting use the [` + "`group_by`" + ` processor](/docs/components/processors/group_by).`, - Config: docs.FieldComponent().Array().WithChildren( - docs.FieldBloblang( - "check", - "A [Bloblang query](/docs/guides/bloblang/about) that should return a boolean value indicating whether a message should have the processors of this case executed on it. If left empty the case always passes. If the check mapping throws an error the message will be flagged [as having failed](/docs/configuration/error_handling) and will not be tested against any other cases.", - `this.type == "foo"`, - `this.contents.urls.contains("https://benthos.dev/")`, - ).HasDefault(""), - docs.FieldProcessor( - "processors", - "A list of [processors](/docs/components/processors/about/) to execute on a message.", - ).HasDefault([]any{}).Array(), - docs.FieldBool( - "fallthrough", - "Indicates whether, if this case passes for a message, the next case should also be executed.", - ).HasDefault(false).Advanced(), - ), - Examples: []docs.AnnotatedExample{ - { - Title: "I Hate George", - Summary: ` +At the end of switch processing the resulting batch will follow the same ordering as the batch was received. If any child processors have split or otherwise grouped messages this grouping will be lost as the result of a switch is always a single batch. In order to perform conditional grouping and/or splitting use the [`+"`group_by`"+` processor](/docs/components/processors/group_by).`). + Example("I Hate George", ` We have a system where we're counting a metric for all messages that pass through our system. However, occasionally we get messages from George where he's rambling about dumb stuff we don't care about. For Georges messages we want to instead emit a metric that gauges how angry he is about being ignored and then we drop it.`, - Config: ` + ` pipeline: processors: - switch: @@ -76,9 +54,45 @@ pipeline: value: ${! json("user.anger") } - mapping: root = deleted() `, - }, - }, - }) + ). + Field(service.NewObjectListField("", + service.NewBloblangField(spFieldCheck). + Description("A [Bloblang query](/docs/guides/bloblang/about) that should return a boolean value indicating whether a message should have the processors of this case executed on it. If left empty the case always passes. If the check mapping throws an error the message will be flagged [as having failed](/docs/configuration/error_handling) and will not be tested against any other cases."). + Examples( + `this.type == "foo"`, + `this.contents.urls.contains("https://benthos.dev/")`, + ). + Default(""), + service.NewProcessorListField(spFieldProcessors). + Description("A list of [processors](/docs/components/processors/about/) to execute on a message."). + Default([]any{}), + service.NewBoolField(spFieldFallthrough). + Description("Indicates whether, if this case passes for a message, the next case should also be executed."). + Advanced(). + Default(false), + )) +} + +func init() { + err := service.RegisterBatchProcessor( + "switch", switchProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + caseConfs, err := conf.FieldObjectList() + if err != nil { + return nil, err + } + + mgr := interop.UnwrapManagement(res) + p := &switchProc{log: mgr.Logger()} + p.cases = make([]switchCase, len(caseConfs)) + for i, c := range caseConfs { + if p.cases[i], err = switchCaseFromParsed(c, mgr); err != nil { + return nil, fmt.Errorf("case '%v' parse error: %w", i, err) + } + } + + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("switch", p, mgr)), nil + }) if err != nil { panic(err) } @@ -92,47 +106,34 @@ type switchCase struct { fallThrough bool } -type switchProc struct { - cases []switchCase - log log.Modular -} - -func newSwitchProc(conf processor.SwitchConfig, mgr bundle.NewManagement) (*switchProc, error) { - var cases []switchCase - for i, caseConf := range conf { - var err error - var check *mapping.Executor - var procs []processor.V1 - - if len(caseConf.Check) > 0 { - if check, err = mgr.BloblEnvironment().NewMapping(caseConf.Check); err != nil { - return nil, fmt.Errorf("failed to parse case %v check: %w", i, err) - } +func switchCaseFromParsed(conf *service.ParsedConfig, mgr bundle.NewManagement) (c switchCase, err error) { + if checkStr, _ := conf.FieldString(spFieldCheck); checkStr != "" { + if c.check, err = mgr.BloblEnvironment().NewMapping(checkStr); err != nil { + return } + } - if len(caseConf.Processors) == 0 { - return nil, fmt.Errorf("case [%v] has no processors, in order to have a no-op case use a `noop` processor", i) - } + c.fallThrough, _ = conf.FieldBool(spFieldFallthrough) - for j, procConf := range caseConf.Processors { - pMgr := mgr.IntoPath("switch", strconv.Itoa(i), "processors", strconv.Itoa(j)) - proc, err := pMgr.NewProcessor(procConf) - if err != nil { - return nil, fmt.Errorf("case [%v] processor [%v]: %w", i, j, err) - } - procs = append(procs, proc) - } + var iProcs []*service.OwnedProcessor + if iProcs, err = conf.FieldProcessorList(spFieldProcessors); err != nil { + return + } + if len(iProcs) == 0 { + err = errors.New("case has no processors, in order to have a no-op case use a `noop` processor") + return + } - cases = append(cases, switchCase{ - check: check, - processors: procs, - fallThrough: caseConf.Fallthrough, - }) + c.processors = make([]processor.V1, len(iProcs)) + for i, proc := range iProcs { + c.processors[i] = interop.UnwrapOwnedProcessor(proc) } - return &switchProc{ - cases: cases, - log: mgr.Logger(), - }, nil + return +} + +type switchProc struct { + cases []switchCase + log log.Modular } // SwitchReorderFromGroup takes a message sort group and rearranges a slice of diff --git a/internal/impl/pure/processor_switch_test.go b/internal/impl/pure/processor_switch_test.go index 262c31aea0..e66e79c431 100644 --- a/internal/impl/pure/processor_switch_test.go +++ b/internal/impl/pure/processor_switch_test.go @@ -17,38 +17,20 @@ import ( ) func TestSwitchCases(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "switch" - - procConf := processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 0: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("A")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 1: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("B")`, - Processors: []processor.Config{procConf}, - Fallthrough: true, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 2: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("C")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) + conf, err := processor.FromYAML(` +switch: + - check: 'content().contains("A")' + processors: + - bloblang: 'root = "Hit case 0: " + content().string()' + - check: 'content().contains("B")' + processors: + - bloblang: 'root = "Hit case 1: " + content().string()' + fallthrough: true + - check: 'content().contains("C")' + processors: + - bloblang: 'root = "Hit case 2: " + content().string()' +`) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -134,28 +116,16 @@ func TestSwitchCases(t *testing.T) { } func TestSwitchError(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "switch" - - procConf := processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 0: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `this.id.not_empty().contains("foo")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 1: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `this.content.contains("bar")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) + conf, err := processor.FromYAML(` +switch: + - check: 'this.id.not_empty().contains("foo")' + processors: + - bloblang: 'root = "Hit case 0: " + content().string()' + - check: 'this.content.contains("bar")' + processors: + - bloblang: 'root = "Hit case 1: " + content().string()' +`) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -195,38 +165,20 @@ func TestSwitchError(t *testing.T) { } func BenchmarkSwitch10(b *testing.B) { - conf := processor.NewConfig() - conf.Type = "switch" - - procConf := processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 0: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("A")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 1: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("B")`, - Processors: []processor.Config{procConf}, - Fallthrough: true, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 2: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("C")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) + conf, err := processor.FromYAML(` +switch: + - check: 'content().contains("A")' + processors: + - bloblang: 'root = "Hit case 0: " + content().string()' + - check: 'content().contains("B")' + processors: + - bloblang: 'root = "Hit case 1: " + content().string()' + fallthrough: true + - check: 'content().contains("C")' + processors: + - bloblang: 'root = "Hit case 2: " + content().string()' +`) + require.NoError(b, err) c, err := mock.NewManager().NewProcessor(conf) require.NoError(b, err) @@ -272,38 +224,20 @@ func BenchmarkSwitch10(b *testing.B) { } func BenchmarkSwitch1(b *testing.B) { - conf := processor.NewConfig() - conf.Type = "switch" - - procConf := processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 0: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("A")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 1: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("B")`, - Processors: []processor.Config{procConf}, - Fallthrough: true, - }) - - procConf = processor.NewConfig() - procConf.Type = "bloblang" - procConf.Bloblang = `root = "Hit case 2: " + content().string()` - - conf.Switch = append(conf.Switch, processor.SwitchCaseConfig{ - Check: `content().contains("C")`, - Processors: []processor.Config{procConf}, - Fallthrough: false, - }) + conf, err := processor.FromYAML(` +switch: + - check: 'content().contains("A")' + processors: + - bloblang: 'root = "Hit case 0: " + content().string()' + - check: 'content().contains("B")' + processors: + - bloblang: 'root = "Hit case 1: " + content().string()' + fallthrough: true + - check: 'content().contains("C")' + processors: + - bloblang: 'root = "Hit case 2: " + content().string()' +`) + require.NoError(b, err) c, err := mock.NewManager().NewProcessor(conf) require.NoError(b, err) diff --git a/internal/impl/pure/processor_sync_response.go b/internal/impl/pure/processor_sync_response.go index 3a3c38abe2..8c323d6fc4 100644 --- a/internal/impl/pure/processor_sync_response.go +++ b/internal/impl/pure/processor_sync_response.go @@ -3,34 +3,27 @@ package pure import ( "context" - "github.com/benthosdev/benthos/v4/internal/bundle" - "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" "github.com/benthosdev/benthos/v4/internal/transaction" + "github.com/benthosdev/benthos/v4/public/service" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - return &syncResponseProc{log: mgr.Logger()}, nil - }, docs.ComponentSpec{ - Name: "sync_response", - Categories: []string{ - "Utility", - }, - Summary: ` -Adds the payload in its current state as a synchronous response to the input -source, where it is dealt with according to that specific input type.`, - Description: ` -For most inputs this mechanism is ignored entirely, in which case the sync -response is dropped without penalty. It is therefore safe to use this processor -even when combining input types that might not have support for sync responses. -An example of an input able to utilise this is the ` + "`http_server`" + `. + err := service.RegisterBatchProcessor("sync_response", service.NewConfigSpec(). + Categories("Utility"). + Stable(). + Summary("Adds the payload in its current state as a synchronous response to the input source, where it is dealt with according to that specific input type."). + Description(` +For most inputs this mechanism is ignored entirely, in which case the sync response is dropped without penalty. It is therefore safe to use this processor even when combining input types that might not have support for sync responses. An example of an input able to utilise this is the `+"`http_server`"+`. -For more information please read [Synchronous Responses](/docs/guides/sync_responses).`, - Config: docs.FieldObject("", "").HasDefault(map[string]any{}), - }) +For more information please read [Synchronous Responses](/docs/guides/sync_responses).`). + Field(service.NewObjectField("").Default(map[string]any{})), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + p := &syncResponseProc{log: interop.UnwrapManagement(mgr).Logger()} + return interop.NewUnwrapInternalBatchProcessor(p), nil + }) if err != nil { panic(err) } diff --git a/internal/impl/pure/processor_try.go b/internal/impl/pure/processor_try.go index 8c4f47c377..ce9e2f6072 100644 --- a/internal/impl/pure/processor_try.go +++ b/internal/impl/pure/processor_try.go @@ -2,34 +2,26 @@ package pure import ( "context" - "strconv" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newTryProc(conf.Try, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("try", p, mgr), nil - }, docs.ComponentSpec{ - Name: "try", - Categories: []string{ - "Composition", - }, - Summary: `Executes a list of child processors on messages only if no prior processors have failed (or the errors have been cleared).`, - Description: ` -This processor behaves similarly to the ` + "[`for_each`](/docs/components/processors/for_each)" + ` processor, where a list of child processors are applied to individual messages of a batch. However, if a message has failed any prior processor (before or during the try block) then that message will skip all following processors. + err := service.RegisterBatchProcessor("try", service.NewConfigSpec(). + Stable(). + Categories("Composition"). + Summary("Executes a list of child processors on messages only if no prior processors have failed (or the errors have been cleared)."). + Description(` +This processor behaves similarly to the `+"[`for_each`](/docs/components/processors/for_each)"+` processor, where a list of child processors are applied to individual messages of a batch. However, if a message has failed any prior processor (before or during the try block) then that message will skip all following processors. For example, with the following config: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - resource: foo @@ -37,21 +29,21 @@ pipeline: - resource: bar - resource: baz - resource: buz -` + "```" + ` +`+"```"+` -If the processor ` + "`bar`" + ` fails for a particular message, that message will skip the processors ` + "`baz` and `buz`" + `. Similarly, if ` + "`bar`" + ` succeeds but ` + "`baz`" + ` does not then ` + "`buz`" + ` will be skipped. If the processor ` + "`foo`" + ` fails for a message then none of ` + "`bar`, `baz` or `buz`" + ` are executed on that message. +If the processor `+"`bar`"+` fails for a particular message, that message will skip the processors `+"`baz` and `buz`"+`. Similarly, if `+"`bar`"+` succeeds but `+"`baz`"+` does not then `+"`buz`"+` will be skipped. If the processor `+"`foo`"+` fails for a message then none of `+"`bar`, `baz` or `buz`"+` are executed on that message. -This processor is useful for when child processors depend on the successful output of previous processors. This processor can be followed with a ` + "[catch](/docs/components/processors/catch)" + ` processor for defining child processors to be applied only to failed messages. +This processor is useful for when child processors depend on the successful output of previous processors. This processor can be followed with a `+"[catch](/docs/components/processors/catch)"+` processor for defining child processors to be applied only to failed messages. More information about error handing can be found [here](/docs/configuration/error_handling). ### Nesting within a catch block -In some cases it might be useful to nest a try block within a catch block, since the ` + "[`catch` processor](/docs/components/processors/catch)" + ` only clears errors _after_ executing its child processors this means a nested try processor will not execute unless the errors are explicitly cleared beforehand. +In some cases it might be useful to nest a try block within a catch block, since the `+"[`catch` processor](/docs/components/processors/catch)"+` only clears errors _after_ executing its child processors this means a nested try processor will not execute unless the errors are explicitly cleared beforehand. This can be done by inserting an empty catch block before the try block like as follows: -` + "```yaml" + ` +`+"```yaml"+` pipeline: processors: - resource: foo @@ -63,12 +55,28 @@ pipeline: - try: - resource: bar - resource: baz -` + "```" + ` - - -`, - Config: docs.FieldProcessor("", "").Array().HasDefault([]any{}), - }) +`+"```"+``). + Field(service.NewProcessorListField("").Default([]any{})), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + mgr := interop.UnwrapManagement(res) + childPubProcs, err := conf.FieldProcessorList() + if err != nil { + return nil, err + } + + childProcs := make([]processor.V1, len(childPubProcs)) + for i, p := range childPubProcs { + childProcs[i] = interop.UnwrapOwnedProcessor(p) + } + + tp, err := newTryProc(childProcs, mgr) + if err != nil { + return nil, err + } + + p := processor.NewAutoObservedBatchedProcessor("try", tp, mgr) + return interop.NewUnwrapInternalBatchProcessor(p), nil + }) if err != nil { panic(err) } @@ -79,16 +87,7 @@ type tryProc struct { log log.Modular } -func newTryProc(conf []processor.Config, mgr bundle.NewManagement) (*tryProc, error) { - var children []processor.V1 - for i, pconf := range conf { - pMgr := mgr.IntoPath("try", strconv.Itoa(i)) - proc, err := pMgr.NewProcessor(pconf) - if err != nil { - return nil, err - } - children = append(children, proc) - } +func newTryProc(children []processor.V1, mgr bundle.NewManagement) (*tryProc, error) { return &tryProc{ children: children, log: mgr.Logger(), diff --git a/internal/impl/pure/processor_try_test.go b/internal/impl/pure/processor_try_test.go index 5154861af4..5e1f49d478 100644 --- a/internal/impl/pure/processor_try_test.go +++ b/internal/impl/pure/processor_try_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -15,8 +16,10 @@ import ( ) func TestTryEmpty(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "try" + conf, err := processor.FromYAML(` +try: [] +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -40,13 +43,11 @@ func TestTryEmpty(t *testing.T) { } func TestTryBasic(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - conf := processor.NewConfig() - conf.Type = "try" - conf.Try = append(conf.Try, encodeConf) + conf, err := processor.FromYAML(` +try: + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -77,13 +78,11 @@ func TestTryBasic(t *testing.T) { } func TestTryFilterSome(t *testing.T) { - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "try" - conf.Try = append(conf.Try, filterConf) + conf, err := processor.FromYAML(` +try: + - bloblang: 'root = if !content().contains("foo") { deleted() }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -113,17 +112,12 @@ func TestTryFilterSome(t *testing.T) { } func TestTryMultiProcs(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "try" - conf.Try = append(conf.Try, filterConf, encodeConf) + conf, err := processor.FromYAML(` +try: + - bloblang: 'root = if !content().contains("foo") { deleted() }' + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -153,17 +147,13 @@ func TestTryMultiProcs(t *testing.T) { } func TestTryFailJSON(t *testing.T) { - encodeConf := processor.NewConfig() - encodeConf.Type = "bloblang" - encodeConf.Bloblang = `root = if batch_index() == 0 { content().encode("base64") }` - - jmespathConf := processor.NewConfig() - jmespathConf.Type = "jmespath" - jmespathConf.JMESPath.Query = "foo" - - conf := processor.NewConfig() - conf.Type = "try" - conf.Try = append(conf.Try, jmespathConf, encodeConf) + conf, err := processor.FromYAML(` +try: + - jmespath: + query: 'foo' + - bloblang: 'root = if batch_index() == 0 { content().encode("base64") }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -203,13 +193,11 @@ func TestTryFailJSON(t *testing.T) { } func TestTryFilterAll(t *testing.T) { - filterConf := processor.NewConfig() - filterConf.Type = "bloblang" - filterConf.Bloblang = `root = if !content().contains("foo") { deleted() }` - - conf := processor.NewConfig() - conf.Type = "try" - conf.Try = append(conf.Try, filterConf) + conf, err := processor.FromYAML(` +try: + - bloblang: 'root = if !content().contains("foo") { deleted() }' +`) + require.NoError(t, err) proc, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_while.go b/internal/impl/pure/processor_while.go index 55d875a828..d69f75cb9f 100644 --- a/internal/impl/pure/processor_while.go +++ b/internal/impl/pure/processor_while.go @@ -9,47 +9,88 @@ import ( "github.com/benthosdev/benthos/v4/internal/bloblang/mapping" "github.com/benthosdev/benthos/v4/internal/bundle" "github.com/benthosdev/benthos/v4/internal/component" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" "github.com/benthosdev/benthos/v4/internal/shutdown" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newWhile(conf.While, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedBatchedProcessor("while", p, mgr), nil - }, docs.ComponentSpec{ - Name: "while", - Categories: []string{ - "Composition", - }, - Summary: ` -A processor that checks a [Bloblang query](/docs/guides/bloblang/about/) against each batch of messages and executes child processors on them for as long as the query resolves to true.`, - Description: ` -The field ` + "`at_least_once`" + `, if true, ensures that the child processors are always executed at least one time (like a do .. while loop.) - -The field ` + "`max_loops`" + `, if greater than zero, caps the number of loops for a message batch to this value. +const ( + wpFieldAtLeastOnce = "at_least_once" + wpFieldMaxLoops = "max_loops" + wpFieldCheck = "check" + wpFieldProcessors = "processors" +) + +func whileProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary("A processor that checks a [Bloblang query](/docs/guides/bloblang/about/) against each batch of messages and executes child processors on them for as long as the query resolves to true."). + Description(` +The field `+"`at_least_once`"+`, if true, ensures that the child processors are always executed at least one time (like a do .. while loop.) + +The field `+"`max_loops`"+`, if greater than zero, caps the number of loops for a message batch to this value. If following a loop execution the number of messages in a batch is reduced to zero the loop is exited regardless of the condition result. If following a loop execution there are more than 1 message batches the query is checked against the first batch only. -The conditions of this processor are applied across entire message batches. You can find out more about batching [in this doc](/docs/configuration/batching).`, - Config: docs.FieldComponent().WithChildren( - docs.FieldBool("at_least_once", "Whether to always run the child processors at least one time."), - docs.FieldInt("max_loops", "An optional maximum number of loops to execute. Helps protect against accidentally creating infinite loops.").Advanced(), - docs.FieldBloblang( - "check", - "A [Bloblang query](/docs/guides/bloblang/about/) that should return a boolean value indicating whether the while loop should execute again.", - `errored()`, - `this.urls.unprocessed.length() > 0`, - ).HasDefault(""), - docs.FieldProcessor("processors", "A list of child processors to execute on each loop.").Array(), - ).ChildDefaultAndTypesFromStruct(processor.NewWhileConfig()), - }) +The conditions of this processor are applied across entire message batches. You can find out more about batching [in this doc](/docs/configuration/batching).`). + Fields( + + service.NewBoolField(wpFieldAtLeastOnce). + Description("Whether to always run the child processors at least one time."). + Default(false), + service.NewIntField(wpFieldMaxLoops). + Description("An optional maximum number of loops to execute. Helps protect against accidentally creating infinite loops."). + Advanced(). + Default(0), + service.NewBloblangField(wpFieldCheck). + Description("A [Bloblang query](/docs/guides/bloblang/about/) that should return a boolean value indicating whether the while loop should execute again."). + Examples(`errored()`, `this.urls.unprocessed.length() > 0`). + Default(""), + service.NewProcessorListField(wpFieldProcessors). + Description("A list of child processors to execute on each loop."), + ) +} + +func init() { + err := service.RegisterBatchProcessor( + "while", whileProcSpec(), + func(conf *service.ParsedConfig, res *service.Resources) (service.BatchProcessor, error) { + maxLoops, err := conf.FieldInt(wpFieldMaxLoops) + if err != nil { + return nil, err + } + + atLeastOnce, err := conf.FieldBool(wpFieldAtLeastOnce) + if err != nil { + return nil, err + } + + checkStr, err := conf.FieldString(wpFieldCheck) + if err != nil { + return nil, err + } + + iProcs, err := conf.FieldProcessorList(wpFieldProcessors) + if err != nil { + return nil, err + } + + children := make([]processor.V1, len(iProcs)) + for i, c := range iProcs { + children[i] = interop.UnwrapOwnedProcessor(c) + } + + mgr := interop.UnwrapManagement(res) + p, err := newWhile(maxLoops, atLeastOnce, checkStr, children, mgr) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(processor.NewAutoObservedBatchedProcessor("while", p, mgr)), nil + }) if err != nil { panic(err) } @@ -65,31 +106,21 @@ type whileProc struct { shutSig *shutdown.Signaller } -func newWhile(conf processor.WhileConfig, mgr bundle.NewManagement) (*whileProc, error) { +func newWhile(maxLoops int, atLeastOnce bool, checkStr string, children []processor.V1, mgr bundle.NewManagement) (*whileProc, error) { var check *mapping.Executor var err error - if len(conf.Check) > 0 { - if check, err = mgr.BloblEnvironment().NewMapping(conf.Check); err != nil { + if len(checkStr) > 0 { + if check, err = mgr.BloblEnvironment().NewMapping(checkStr); err != nil { return nil, fmt.Errorf("failed to parse check query: %w", err) } } else { return nil, errors.New("a check query is required") } - var children []processor.V1 - for i, pconf := range conf.Processors { - pMgr := mgr.IntoPath("while", "processors", strconv.Itoa(i)) - proc, err := pMgr.NewProcessor(pconf) - if err != nil { - return nil, err - } - children = append(children, proc) - } - return &whileProc{ - maxLoops: conf.MaxLoops, - atLeastOnce: conf.AtLeastOnce, + maxLoops: maxLoops, + atLeastOnce: atLeastOnce, check: check, children: children, log: mgr.Logger(), diff --git a/internal/impl/pure/processor_while_test.go b/internal/impl/pure/processor_while_test.go index c7f2d34613..cb14b12d92 100644 --- a/internal/impl/pure/processor_while_test.go +++ b/internal/impl/pure/processor_while_test.go @@ -26,16 +26,15 @@ func TestWhileErrs(t *testing.T) { } func TestWhileWithCount(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "while" - conf.While.Check = `count("while_test_1") < 3` - - procConf := processor.NewConfig() - procConf.Type = "insert_part" - procConf.InsertPart.Content = "foo" - procConf.InsertPart.Index = 0 - - conf.While.Processors = append(conf.While.Processors, procConf) + conf, err := processor.FromYAML(` +while: + check: 'count("while_test_1") < 3' + processors: + - insert_part: + content: foo + index: 0 +`) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) @@ -53,16 +52,15 @@ func TestWhileWithCount(t *testing.T) { } func TestWhileWithContentCheck(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "while" - conf.While.Check = "batch_size() <= 3" - - procConf := processor.NewConfig() - procConf.Type = "insert_part" - procConf.InsertPart.Content = "foo" - procConf.InsertPart.Index = 0 - - conf.While.Processors = append(conf.While.Processors, procConf) + conf, err := processor.FromYAML(` +while: + check: 'batch_size() <= 3' + processors: + - insert_part: + content: foo + index: 0 +`) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -86,17 +84,16 @@ func TestWhileWithContentCheck(t *testing.T) { } func TestWhileWithCountALO(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "while" - conf.While.Check = `count("while_test_2") < 3` - conf.While.AtLeastOnce = true - - procConf := processor.NewConfig() - procConf.Type = "insert_part" - procConf.InsertPart.Content = "foo" - procConf.InsertPart.Index = 0 - - conf.While.Processors = append(conf.While.Processors, procConf) + conf, err := processor.FromYAML(` +while: + check: 'count("while_test_2") < 3' + at_least_once: true + processors: + - insert_part: + content: foo + index: 0 +`) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -120,17 +117,16 @@ func TestWhileWithCountALO(t *testing.T) { } func TestWhileMaxLoops(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "while" - conf.While.MaxLoops = 3 - conf.While.Check = `true` - - procConf := processor.NewConfig() - procConf.Type = "insert_part" - procConf.InsertPart.Content = "foo" - procConf.InsertPart.Index = 0 - - conf.While.Processors = append(conf.While.Processors, procConf) + conf, err := processor.FromYAML(` +while: + check: 'true' + max_loops: 3 + processors: + - insert_part: + content: foo + index: 0 +`) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) if err != nil { @@ -154,22 +150,17 @@ func TestWhileMaxLoops(t *testing.T) { } func TestWhileWithStaticTrue(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "while" - conf.While.Check = `true` - - procConf := processor.NewConfig() - procConf.Type = "insert_part" - procConf.InsertPart.Content = "foo" - procConf.InsertPart.Index = 0 - - conf.While.Processors = append(conf.While.Processors, procConf) - - procConf = processor.NewConfig() - procConf.Type = "sleep" - procConf.Sleep.Duration = "100ms" - - conf.While.Processors = append(conf.While.Processors, procConf) + conf, err := processor.FromYAML(` +while: + check: 'true' + processors: + - insert_part: + content: 'foo' + index: 0 + - sleep: + duration: 100ms +`) + require.NoError(t, err) c, err := mock.NewManager().NewProcessor(conf) if err != nil { diff --git a/internal/impl/pure/processor_workflow.go b/internal/impl/pure/processor_workflow.go index 12acc1c7a1..e2b8507d1f 100644 --- a/internal/impl/pure/processor_workflow.go +++ b/internal/impl/pure/processor_workflow.go @@ -11,35 +11,34 @@ import ( "go.opentelemetry.io/otel/trace" "github.com/benthosdev/benthos/v4/internal/bundle" + "github.com/benthosdev/benthos/v4/internal/component/interop" "github.com/benthosdev/benthos/v4/internal/component/metrics" - "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" "github.com/benthosdev/benthos/v4/internal/log" "github.com/benthosdev/benthos/v4/internal/message" "github.com/benthosdev/benthos/v4/internal/tracing" + "github.com/benthosdev/benthos/v4/public/service" ) -func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := NewWorkflow(conf.Workflow, mgr) - return p, err - }, docs.ComponentSpec{ - Name: "workflow", - Categories: []string{ - "Composition", - }, - Status: docs.StatusStable, - Summary: ` -Executes a topology of ` + "[`branch` processors][processors.branch]" + `, -performing them in parallel where possible.`, - Description: ` +const ( + wflowProcFieldMetaPath = "meta_path" + wflowProcFieldOrder = "order" + wflowProcFieldBranchResources = "branch_resources" + wflowProcFieldBranches = "branches" +) + +func workflowProcSpec() *service.ConfigSpec { + return service.NewConfigSpec(). + Categories("Composition"). + Stable(). + Summary(`Executes a topology of `+"[`branch` processors][processors.branch]"+`, performing them in parallel where possible.`). + Description(` ## Why Use a Workflow ### Performance Most of the time the best way to compose processors is also the simplest, just configure them in series. This is because processors are often CPU bound, low-latency, and you can gain vertical scaling by increasing the number of processor pipeline threads, allowing Benthos to process [multiple messages in parallel][configuration.pipelines]. -However, some processors such as ` + "[`http`][processors.http], [`aws_lambda`][processors.aws_lambda] or [`cache`][processors.cache]" + ` interact with external services and therefore spend most of their time waiting for a response. These processors tend to be high-latency and low CPU activity, which causes messages to process slowly. +However, some processors such as `+"[`http`][processors.http], [`aws_lambda`][processors.aws_lambda] or [`cache`][processors.cache]"+` interact with external services and therefore spend most of their time waiting for a response. These processors tend to be high-latency and low CPU activity, which causes messages to process slowly. When a processing pipeline contains multiple network processors that aren't dependent on each other we can benefit from performing these processors in parallel for each individual message, reducing the overall message processing latency. @@ -49,31 +48,31 @@ A workflow is often expressed as a [DAG][dag_wiki] of processing stages, where e For example, if we had processing stages A, B, C and D, where stage A could result in either stage B or C being next, always followed by D, it might look something like this: -` + "```text" + ` +`+"```text"+` /--> B --\ A --| |--> D \--> C --/ -` + "```" + ` +`+"```"+` -This flow would be easy to express in a standard Benthos config, we could simply use a ` + "[`switch` processor][processors.switch]" + ` to route to either B or C depending on a condition on the result of A. However, this method of flow control quickly becomes unfeasible as the DAG gets more complicated, imagine expressing this flow using switch processors: +This flow would be easy to express in a standard Benthos config, we could simply use a `+"[`switch` processor][processors.switch]"+` to route to either B or C depending on a condition on the result of A. However, this method of flow control quickly becomes unfeasible as the DAG gets more complicated, imagine expressing this flow using switch processors: -` + "```text" + ` +`+"```text"+` /--> B -------------|--> D / / A --| /--> E --| \--> C --| \ \----------|--> F -` + "```" + ` +`+"```"+` -And imagine doing so knowing that the diagram is subject to change over time. Yikes! Instead, with a workflow we can either trust it to automatically resolve the DAG or express it manually as simply as ` + "`order: [ [ A ], [ B, C ], [ E ], [ D, F ] ]`" + `, and the conditional logic for determining if a stage is executed is defined as part of the branch itself.`, - Footnotes: ` +And imagine doing so knowing that the diagram is subject to change over time. Yikes! Instead, with a workflow we can either trust it to automatically resolve the DAG or express it manually as simply as `+"`order: [ [ A ], [ B, C ], [ E ], [ D, F ] ]`"+`, and the conditional logic for determining if a stage is executed is defined as part of the branch itself.`). + Footnotes(` ## Structured Metadata -When the field ` + "`meta_path`" + ` is non-empty the workflow processor creates an object describing which workflows were successful, skipped or failed for each message and stores the object within the message at the end. +When the field `+"`meta_path`"+` is non-empty the workflow processor creates an object describing which workflows were successful, skipped or failed for each message and stores the object within the message at the end. The object is of the following form: -` + "```json" + ` +`+"```json"+` { "succeeded": [ "foo" ], "skipped": [ "bar" ], @@ -81,21 +80,21 @@ The object is of the following form: "baz": "the error message from the branch" } } -` + "```" + ` +`+"```"+` If a message already has a meta object at the given path when it is processed then the object is used in order to determine which branches have already been performed on the message (or skipped) and can therefore be skipped on this run. This is a useful pattern when replaying messages that have failed some branches previously. For example, given the above example object the branches foo and bar would automatically be skipped, and baz would be reattempted. -The previous meta object will also be preserved in the field ` + "`.previous`" + ` when the new meta object is written, preserving a full record of all workflow executions. +The previous meta object will also be preserved in the field `+"`.previous`"+` when the new meta object is written, preserving a full record of all workflow executions. -If a field ` + "`.apply`" + ` exists in the meta object for a message and is an array then it will be used as an explicit list of stages to apply, all other stages will be skipped. +If a field `+"`.apply`"+` exists in the meta object for a message and is an array then it will be used as an explicit list of stages to apply, all other stages will be skipped. ## Resources -It's common to configure processors (and other components) [as resources][configuration.resources] in order to keep the pipeline configuration cleaner. With the workflow processor you can include branch processors configured as resources within your workflow either by specifying them by name in the field ` + "`order`" + `, if Benthos doesn't find a branch within the workflow configuration of that name it'll refer to the resources. +It's common to configure processors (and other components) [as resources][configuration.resources] in order to keep the pipeline configuration cleaner. With the workflow processor you can include branch processors configured as resources within your workflow either by specifying them by name in the field `+"`order`"+`, if Benthos doesn't find a branch within the workflow configuration of that name it'll refer to the resources. -Alternatively, if you do not wish to have an explicit ordering, you can add resource names to the field ` + "`branch_resources`" + ` and they will be included in the workflow with automatic DAG resolution along with any branches configured in the ` + "`branches`" + ` field. +Alternatively, if you do not wish to have an explicit ordering, you can add resource names to the field `+"`branch_resources`"+` and they will be included in the workflow with automatic DAG resolution along with any branches configured in the `+"`branches`"+` field. ### Resource Error Conditions @@ -109,9 +108,9 @@ The second error case is when automatic DAG resolution is being used and a resou The recommended approach to handle failures within a workflow is to query against the [structured metadata](#structured-metadata) it provides, as it provides granular information about exactly which branches failed and which ones succeeded and therefore aren't necessary to perform again. -For example, if our meta object is stored at the path ` + "`meta.workflow`" + ` and we wanted to check whether a message has failed for any branch we can do that using a [Bloblang query][guides.bloblang] like ` + "`this.meta.workflow.failed.length() | 0 > 0`" + `, or to check whether a specific branch failed we can use ` + "`this.exists(\"meta.workflow.failed.foo\")`" + `. +For example, if our meta object is stored at the path `+"`meta.workflow`"+` and we wanted to check whether a message has failed for any branch we can do that using a [Bloblang query][guides.bloblang] like `+"`this.meta.workflow.failed.length() | 0 > 0`"+`, or to check whether a specific branch failed we can use `+"`this.exists(\"meta.workflow.failed.foo\")`"+`. -However, if structured metadata is disabled by setting the field ` + "`meta_path`" + ` to empty then the workflow processor instead adds a general error flag to messages when any executed branch fails. In this case it's possible to handle failures using [standard error handling patterns][configuration.error-handling]. +However, if structured metadata is disabled by setting the field `+"`meta_path`"+` to empty then the workflow processor instead adds a general error flag to messages when any executed branch fails. In this case it's possible to handle failures using [standard error handling patterns][configuration.error-handling]. [dag_wiki]: https://en.wikipedia.org/wiki/Directed_acyclic_graph [processors.switch]: /docs/components/processors/switch @@ -123,13 +122,9 @@ However, if structured metadata is disabled by setting the field ` + "`meta_path [configuration.pipelines]: /docs/configuration/processing_pipelines [configuration.error-handling]: /docs/configuration/error_handling [configuration.resources]: /docs/configuration/resources -`, - Examples: []docs.AnnotatedExample{ - { - Title: "Automatic Ordering", - Summary: ` -When the field ` + "`order`" + ` is omitted a best attempt is made to determine a dependency tree between branches based on their request and result mappings. In the following example the branches foo and bar will be executed first in parallel, and afterwards the branch baz will be executed.`, - Config: ` +`). + Example("Automatic Ordering", ` +When the field `+"`order`"+` is omitted a best attempt is made to determine a dependency tree between branches based on their request and result mappings. In the following example the branches foo and bar will be executed first in parallel, and afterwards the branch baz will be executed.`, ` pipeline: processors: - workflow: @@ -159,13 +154,9 @@ pipeline: operator: set key: ${! json("fooid") } value: ${! json("barstuff") } -`, - }, - { - Title: "Conditional Branches", - Summary: ` -Branches of a workflow are skipped when the ` + "`request_map`" + ` assigns ` + "`deleted()`" + ` to the root. In this example the branch A is executed when the document type is "foo", and branch B otherwise. Branch C is executed afterwards and is skipped unless either A or B successfully provided a result at ` + "`tmp.result`" + `.`, - Config: ` +`). + Example("Conditional Branches", ` +Branches of a workflow are skipped when the `+"`request_map`"+` assigns `+"`deleted()`"+` to the root. In this example the branch A is executed when the document type is "foo", and branch B otherwise. Branch C is executed afterwards and is skipped unless either A or B successfully provided a result at `+"`tmp.result`"+`.`, ` pipeline: processors: - workflow: @@ -199,13 +190,9 @@ pipeline: - http: url: TODO_SOMEWHERE_ELSE result_map: 'root.tmp.result = this' -`, - }, - { - Title: "Resources", - Summary: ` -The ` + "`order`" + ` field can be used in order to refer to [branch processor resources](#resources), this can sometimes make your pipeline configuration cleaner, as well as allowing you to reuse branch configurations in order places. It's also possible to mix and match branches configured within the workflow and configured as resources.`, - Config: ` +`). + Example("Resources", ` +The `+"`order`"+` field can be used in order to refer to [branch processor resources](#resources), this can sometimes make your pipeline configuration cleaner, as well as allowing you to reuse branch configurations in order places. It's also possible to mix and match branches configured within the workflow and configured as resources.`, ` pipeline: processors: - workflow: @@ -238,27 +225,39 @@ processor_resources: operator: set key: ${! json("fooid") } value: ${! json("barstuff") } -`, - }, - }, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("meta_path", "A [dot path](/docs/configuration/field_paths) indicating where to store and reference [structured metadata](#structured-metadata) about the workflow execution.").HasDefault("meta.workflow"), - docs.FieldString( - "order", - "An explicit declaration of branch ordered tiers, which describes the order in which parallel tiers of branches should be executed. Branches should be identified by the name as they are configured in the field `branches`. It's also possible to specify branch processors configured [as a resource](#resources).", - [][]string{{"foo", "bar"}, {"baz"}}, - [][]string{{"foo"}, {"bar"}, {"baz"}}, - ).ArrayOfArrays().HasDefault([]any{}), - docs.FieldString( - "branch_resources", - "An optional list of [`branch` processor](/docs/components/processors/branch) names that are configured as [resources](#resources). These resources will be included in the workflow with any branches configured inline within the [`branches`](#branches) field. The order and parallelism in which branches are executed is automatically resolved based on the mappings of each branch. When using resources with an explicit order it is not necessary to list resources in this field.", - ).AtVersion("3.38.0").Advanced().Array().HasDefault([]any{}), - docs.FieldObject( - "branches", - "An object of named [`branch` processors](/docs/components/processors/branch) that make up the workflow. The order and parallelism in which branches are executed can either be made explicit with the field `order`, or if omitted an attempt is made to automatically resolve an ordering based on the mappings of each branch.", - ).Map().WithChildren(branchFields...).HasDefault(map[string]any{}), - ), - }) +`). + Fields( + service.NewStringField(wflowProcFieldMetaPath). + Description("A [dot path](/docs/configuration/field_paths) indicating where to store and reference [structured metadata](#structured-metadata) about the workflow execution."). + Default("meta.workflow"), + service.NewStringListOfListsField(wflowProcFieldOrder). + Description("An explicit declaration of branch ordered tiers, which describes the order in which parallel tiers of branches should be executed. Branches should be identified by the name as they are configured in the field `branches`. It's also possible to specify branch processors configured [as a resource](#resources)."). + Examples( + []any{[]any{"foo", "bar"}, []any{"baz"}}, + []any{[]any{"foo"}, []any{"bar"}, []any{"baz"}}, + ). + Default([]any{}), + service.NewStringListField(wflowProcFieldBranchResources). + Description("An optional list of [`branch` processor](/docs/components/processors/branch) names that are configured as [resources](#resources). These resources will be included in the workflow with any branches configured inline within the [`branches`](#branches) field. The order and parallelism in which branches are executed is automatically resolved based on the mappings of each branch. When using resources with an explicit order it is not necessary to list resources in this field."). + Version("3.38.0"). + Advanced(). + Default([]any{}), + service.NewObjectMapField(wflowProcFieldBranches, branchSpecFields()...). + Description("An object of named [`branch` processors](/docs/components/processors/branch) that make up the workflow. The order and parallelism in which branches are executed can either be made explicit with the field `order`, or if omitted an attempt is made to automatically resolve an ordering based on the mappings of each branch."). + Default(map[string]any{}), + ) +} + +func init() { + err := service.RegisterBatchProcessor( + "workflow", workflowProcSpec(), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + w, err := NewWorkflow(conf, interop.UnwrapManagement(mgr)) + if err != nil { + return nil, err + } + return interop.NewUnwrapInternalBatchProcessor(w), nil + }) if err != nil { panic(err) } @@ -287,7 +286,7 @@ type Workflow struct { } // NewWorkflow instanciates a new workflow processor. -func NewWorkflow(conf processor.WorkflowConfig, mgr bundle.NewManagement) (*Workflow, error) { +func NewWorkflow(conf *service.ParsedConfig, mgr bundle.NewManagement) (*Workflow, error) { stats := mgr.Metrics() w := &Workflow{ log: mgr.Logger(), @@ -303,11 +302,15 @@ func NewWorkflow(conf processor.WorkflowConfig, mgr bundle.NewManagement) (*Work mError: stats.GetCounter("processor_error"), mLatency: stats.GetTimer("processor_latency_ns"), } - if len(conf.MetaPath) > 0 { - w.metaPath = gabs.DotPathToSlice(conf.MetaPath) + + metaStr, err := conf.FieldString(wflowProcFieldMetaPath) + if err != nil { + return nil, err + } + if len(metaStr) > 0 { + w.metaPath = gabs.DotPathToSlice(metaStr) } - var err error if w.children, err = newWorkflowBranchMap(conf, mgr); err != nil { return nil, err } diff --git a/internal/impl/pure/processor_workflow_branch_map.go b/internal/impl/pure/processor_workflow_branch_map.go index e410c4d980..c38b507054 100644 --- a/internal/impl/pure/processor_workflow_branch_map.go +++ b/internal/impl/pure/processor_workflow_branch_map.go @@ -11,6 +11,7 @@ import ( "github.com/benthosdev/benthos/v4/internal/bundle" "github.com/benthosdev/benthos/v4/internal/component/processor" + "github.com/benthosdev/benthos/v4/public/service" ) type workflowBranch interface { @@ -88,14 +89,19 @@ func (w *workflowBranchMap) Close(ctx context.Context) error { var processDAGStageName = regexp.MustCompile("[a-zA-Z0-9-_]+") -func newWorkflowBranchMap(conf processor.WorkflowConfig, mgr bundle.NewManagement) (*workflowBranchMap, error) { +func newWorkflowBranchMap(conf *service.ParsedConfig, mgr bundle.NewManagement) (*workflowBranchMap, error) { + branchObjMap, err := conf.FieldObjectMap(wflowProcFieldBranches) + if err != nil { + return nil, err + } + dynamicBranches, staticBranches := map[string]workflowBranch{}, map[string]*Branch{} - for k, v := range conf.Branches { + for k, v := range branchObjMap { if len(processDAGStageName.FindString(k)) != len(k) { return nil, fmt.Errorf("workflow branch name '%v' contains invalid characters", k) } - child, err := newBranch(v, mgr.IntoPath("workflow", "branches", k)) + child, err := newBranchFromParsed(v, mgr.IntoPath("workflow", "branches", k)) if err != nil { return nil, err } @@ -104,7 +110,11 @@ func newWorkflowBranchMap(conf processor.WorkflowConfig, mgr bundle.NewManagemen staticBranches[k] = child } - for _, k := range conf.BranchResources { + branchResources, err := conf.FieldStringList(wflowProcFieldBranchResources) + if err != nil { + return nil, err + } + for _, k := range branchResources { if _, exists := dynamicBranches[k]; exists { return nil, fmt.Errorf("branch resource name '%v' collides with an explicit branch", k) } @@ -119,7 +129,11 @@ func newWorkflowBranchMap(conf processor.WorkflowConfig, mgr bundle.NewManagemen // When order is specified we infer that names missing from our explicit // branches are resources. - for _, tier := range conf.Order { + order, err := conf.FieldStringListOfLists(wflowProcFieldOrder) + if err != nil { + return nil, err + } + for _, tier := range order { for _, k := range tier { if _, exists := dynamicBranches[k]; !exists { if !mgr.ProbeProcessor(k) { @@ -136,8 +150,8 @@ func newWorkflowBranchMap(conf processor.WorkflowConfig, mgr bundle.NewManagemen static := len(dynamicBranches) == len(staticBranches) var dag [][]string - if len(conf.Order) > 0 { - dag = conf.Order + if len(order) > 0 { + dag = order if err := verifyStaticBranchDAG(dag, dynamicBranches); err != nil { return nil, err } diff --git a/internal/impl/pure/processor_workflow_test.go b/internal/impl/pure/processor_workflow_test.go index 6de2e5fa16..a4d9ce794a 100644 --- a/internal/impl/pure/processor_workflow_test.go +++ b/internal/impl/pure/processor_workflow_test.go @@ -3,12 +3,15 @@ package pure_test import ( "context" "errors" + "fmt" "sort" "strconv" + "strings" "sync" "testing" "time" + "github.com/Jeffail/gabs/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -26,7 +29,7 @@ func TestWorkflowDeps(t *testing.T) { branches [][2]string inputOrdering [][]string ordering [][]string - err string + errContains string }{ { branches: [][2]string{ @@ -103,7 +106,7 @@ func TestWorkflowDeps(t *testing.T) { "root.foo = this", }, }, - err: "failed to automatically resolve DAG, circular dependencies detected for branches: [0 1 2]", + errContains: "failed to automatically resolve DAG, circular dependencies detected for branches: [0 1 2]", }, { branches: [][2]string{ @@ -123,7 +126,7 @@ func TestWorkflowDeps(t *testing.T) { inputOrdering: [][]string{ {"1"}, {"0"}, }, - err: "the following branches were missing from order: [2]", + errContains: "the following branches were missing from order: [2]", }, { branches: [][2]string{ @@ -143,7 +146,7 @@ func TestWorkflowDeps(t *testing.T) { inputOrdering: [][]string{ {"1"}, {"0", "2"}, {"1"}, }, - err: "branch specified in order listed multiple times: 1", + errContains: "branch specified in order listed multiple times: 1", }, { branches: [][2]string{ @@ -157,7 +160,7 @@ func TestWorkflowDeps(t *testing.T) { }, { `root.bar = this.bar - root.baz = this.baz`, +root.baz = this.baz`, "root.buz = this", }, }, @@ -170,26 +173,42 @@ func TestWorkflowDeps(t *testing.T) { for i, test := range tests { test := test t.Run(strconv.Itoa(i), func(t *testing.T) { - conf := processor.NewConfig() - conf.Workflow.Order = test.inputOrdering + if test.inputOrdering == nil { + test.inputOrdering = [][]string{} + } + confStr := fmt.Sprintf(` +workflow: + order: %v + branches: +`, gabs.Wrap(test.inputOrdering).String()) + for j, mappings := range test.branches { - branchConf := processor.NewBranchConfig() - branchConf.RequestMap = mappings[0] - branchConf.ResultMap = mappings[1] - dudProc := processor.NewConfig() - dudProc.Type = "bloblang" - dudProc.Bloblang = "root = this" - branchConf.Processors = append(branchConf.Processors, dudProc) - conf.Workflow.Branches[strconv.Itoa(j)] = branchConf + confStr += fmt.Sprintf(` + %v: + request_map: | + %v + processors: + - bloblang: root = this + result_map: | + %v +`, + strconv.Itoa(j), + strings.ReplaceAll(mappings[0], "\n", "\n "), + strings.ReplaceAll(mappings[1], "\n", "\n "), + ) } - p, err := pure.NewWorkflow(conf.Workflow, mock.NewManager()) - if len(test.err) > 0 { - assert.EqualError(t, err, test.err) + conf, err := processor.FromYAML(confStr) + require.NoError(t, err) + + p, err := mock.NewManager().NewProcessor(conf) + if len(test.errContains) > 0 { + require.Error(t, err) + assert.Contains(t, err.Error(), test.errContains) } else { require.NoError(t, err) - dag := p.Flow() + dag := p.(*pure.Workflow).Flow() for _, d := range dag { sort.Strings(d) } @@ -207,25 +226,31 @@ func newMockProcProvider(t *testing.T, confs map[string]processor.Config) bundle v.Label = k resConf.ResourceProcessors = append(resConf.ResourceProcessors, v) } - mgr, err := manager.New(resConf) require.NoError(t, err) return mgr } -func quickTestBranches(branches ...[4]string) map[string]processor.Config { +func quickTestBranches(t testing.TB, branches ...[4]string) map[string]processor.Config { + t.Helper() m := map[string]processor.Config{} for _, b := range branches { - blobConf := processor.NewConfig() - blobConf.Type = "bloblang" - blobConf.Bloblang = b[2] - - conf := processor.NewConfig() - conf.Type = "branch" - conf.Branch.RequestMap = b[1] - conf.Branch.Processors = append(conf.Branch.Processors, blobConf) - conf.Branch.ResultMap = b[3] + conf, err := processor.FromYAML(fmt.Sprintf(` +branch: + request_map: | + %v + processors: + - bloblang: | + %v + result_map: | + %v +`, + strings.ReplaceAll(b[1], "\n", "\n "), + strings.ReplaceAll(b[2], "\n", "\n "), + strings.ReplaceAll(b[3], "\n", "\n "), + )) + require.NoError(t, err) m[b[0]] = conf } @@ -233,29 +258,34 @@ func quickTestBranches(branches ...[4]string) map[string]processor.Config { } func TestWorkflowMissingResources(t *testing.T) { - conf := processor.NewConfig() - conf.Workflow.Order = [][]string{ - {"foo", "bar", "baz"}, - } - - branchConf := processor.NewConfig() - branchConf.Branch.RequestMap = "root = this" - branchConf.Branch.ResultMap = "root = this" - - blobConf := processor.NewConfig() - blobConf.Type = "bloblang" - blobConf.Bloblang = "root = this" - - branchConf.Branch.Processors = append(branchConf.Branch.Processors, blobConf) + conf, err := processor.FromYAML(` +workflow: + order: [[ foo, bar, baz ]] + branches: + bar: + request_map: root = this + processors: + - bloblang: root = this + result_map: root = this +`) + require.NoError(t, err) - conf.Workflow.Branches["bar"] = branchConf.Branch + branchConf, err := processor.FromYAML(` +branch: + request_map: root = this + processors: + - bloblang: root = this + result_map: root = this +`) + require.NoError(t, err) mgr := newMockProcProvider(t, map[string]processor.Config{ "baz": branchConf, }) - _, err := pure.NewWorkflow(conf.Workflow, mgr) - require.EqualError(t, err, "processor resource 'foo' was not found") + _, err = mgr.NewProcessor(conf) + require.Error(t, err) + require.Contains(t, err.Error(), "processor resource 'foo' was not found") } type mockMsg struct { @@ -384,7 +414,7 @@ func TestWorkflows(t *testing.T) { }, { `root.bar = this.bar.not_null() - root.baz = this.baz.not_null()`, +root.baz = this.baz.not_null()`, "root = this", "root.buz = this.bar + this.baz", }, @@ -405,12 +435,12 @@ func TestWorkflows(t *testing.T) { { `root = this`, `root = this - root.name_upper = this.name.uppercase()`, +root.name_upper = this.name.uppercase()`, `root.result = if this.failme.bool(false) { - throw("this is a branch error") - } else { - this.name_upper - }`, + throw("this is a branch error") +} else { + this.name_upper +}`, }, }, input: []mockMsg{ @@ -431,20 +461,37 @@ func TestWorkflows(t *testing.T) { for i, test := range tests { test := test t.Run(strconv.Itoa(i), func(t *testing.T) { - conf := processor.NewConfig() - conf.Workflow.Order = test.order + if test.order == nil { + test.order = [][]string{} + } + confStr := fmt.Sprintf(` +workflow: + order: %v + branches: +`, gabs.Wrap(test.order).String()) + for j, mappings := range test.branches { - branchConf := processor.NewBranchConfig() - branchConf.RequestMap = mappings[0] - branchConf.ResultMap = mappings[2] - proc := processor.NewConfig() - proc.Type = "bloblang" - proc.Bloblang = mappings[1] - branchConf.Processors = append(branchConf.Processors, proc) - conf.Workflow.Branches[strconv.Itoa(j)] = branchConf + confStr += fmt.Sprintf(` + %v: + request_map: | + %v + processors: + - bloblang: | + %v + result_map: | + %v +`, + strconv.Itoa(j), + strings.ReplaceAll(mappings[0], "\n", "\n "), + strings.ReplaceAll(mappings[1], "\n", "\n "), + strings.ReplaceAll(mappings[2], "\n", "\n "), + ) } - p, err := pure.NewWorkflow(conf.Workflow, mock.NewManager()) + conf, err := processor.FromYAML(confStr) + require.NoError(t, err) + + p, err := mock.NewManager().NewProcessor(conf) require.NoError(t, err) inputMsg := message.QuickBatch(nil) @@ -634,14 +681,19 @@ func TestWorkflowsWithResources(t *testing.T) { for i, test := range tests { test := test t.Run(strconv.Itoa(i), func(t *testing.T) { - conf := processor.NewConfig() - conf.Workflow.BranchResources = []string{} + var branchNames []string for _, b := range test.branches { - conf.Workflow.BranchResources = append(conf.Workflow.BranchResources, b[0]) + branchNames = append(branchNames, b[0]) } - mgr := newMockProcProvider(t, quickTestBranches(test.branches...)) - p, err := pure.NewWorkflow(conf.Workflow, mgr) + conf, err := processor.FromYAML(fmt.Sprintf(` +workflow: + branch_resources: %v +`, gabs.Wrap(branchNames).String())) + require.NoError(t, err) + + mgr := newMockProcProvider(t, quickTestBranches(t, test.branches...)) + p, err := mgr.NewProcessor(conf) require.NoError(t, err) var parts [][]byte @@ -701,15 +753,20 @@ func TestWorkflowsParallel(t *testing.T) { `{"bar":5,"baz":10,"buz":12,"foo":"5","meta":{"workflow":{"succeeded":["0","1","2"]}}}`, } - conf := processor.NewConfig() - conf.Workflow.BranchResources = []string{} + var branchNames []string for _, b := range branches { - conf.Workflow.BranchResources = append(conf.Workflow.BranchResources, b[0]) + branchNames = append(branchNames, b[0]) } + conf, err := processor.FromYAML(fmt.Sprintf(` +workflow: + branch_resources: %v +`, gabs.Wrap(branchNames).String())) + require.NoError(t, err) + for loops := 0; loops < 10; loops++ { - mgr := newMockProcProvider(t, quickTestBranches(branches...)) - p, err := pure.NewWorkflow(conf.Workflow, mgr) + mgr := newMockProcProvider(t, quickTestBranches(t, branches...)) + p, err := mgr.NewProcessor(conf) require.NoError(t, err) startChan := make(chan struct{}) @@ -897,11 +954,17 @@ func TestWorkflowsWithOrderResources(t *testing.T) { for i, test := range tests { test := test t.Run(strconv.Itoa(i), func(t *testing.T) { - conf := processor.NewConfig() - conf.Workflow.Order = test.order + if test.order == nil { + test.order = [][]string{} + } + conf, err := processor.FromYAML(fmt.Sprintf(` +workflow: + order: %v +`, gabs.Wrap(test.order).String())) + require.NoError(t, err) - mgr := newMockProcProvider(t, quickTestBranches(test.branches...)) - p, err := pure.NewWorkflow(conf.Workflow, mgr) + mgr := newMockProcProvider(t, quickTestBranches(t, test.branches...)) + p, err := mgr.NewProcessor(conf) require.NoError(t, err) var parts [][]byte diff --git a/internal/impl/xml/processor.go b/internal/impl/xml/processor.go index 312d167410..d6a9f0f86a 100644 --- a/internal/impl/xml/processor.go +++ b/internal/impl/xml/processor.go @@ -4,47 +4,35 @@ import ( "context" "fmt" - "github.com/benthosdev/benthos/v4/internal/bundle" - "github.com/benthosdev/benthos/v4/internal/component/processor" - "github.com/benthosdev/benthos/v4/internal/docs" - "github.com/benthosdev/benthos/v4/internal/log" - "github.com/benthosdev/benthos/v4/internal/message" + "github.com/benthosdev/benthos/v4/public/service" +) + +const ( + pFieldOperator = "operator" + pFieldCast = "cast" ) func init() { - err := bundle.AllProcessors.Add(func(conf processor.Config, mgr bundle.NewManagement) (processor.V1, error) { - p, err := newXML(conf.XML, mgr) - if err != nil { - return nil, err - } - return processor.NewAutoObservedProcessor("xml", p, mgr), nil - }, docs.ComponentSpec{ - Name: "xml", - Status: docs.StatusBeta, - Categories: []string{ - "Parsing", - }, - Summary: ` -Parses messages as an XML document, performs a mutation on the data, and then -overwrites the previous contents with the new value.`, - Description: ` + err := service.RegisterProcessor( + "xml", service.NewConfigSpec(). + Categories("Parsing"). + Beta(). + Summary(`Parses messages as an XML document, performs a mutation on the data, and then overwrites the previous contents with the new value.`). + Description(` ## Operators -### ` + "`to_json`" + ` +### `+"`to_json`"+` -Converts an XML document into a JSON structure, where elements appear as keys of -an object according to the following rules: +Converts an XML document into a JSON structure, where elements appear as keys of an object according to the following rules: -- If an element contains attributes they are parsed by prefixing a hyphen, - ` + "`-`" + `, to the attribute label. -- If the element is a simple element and has attributes, the element value - is given the key ` + "`#text`" + `. +- If an element contains attributes they are parsed by prefixing a hyphen, `+"`-`"+`, to the attribute label. +- If the element is a simple element and has attributes, the element value is given the key `+"`#text`"+`. - XML comments, directives, and process instructions are ignored. - When elements are repeated the resulting JSON value is an array. For example, given the following XML: -` + "```xml" + ` +`+"```xml"+` This is a title This is a description @@ -52,11 +40,11 @@ For example, given the following XML: foo2 foo3 -` + "```" + ` +`+"```"+` The resulting JSON structure would look like this: -` + "```json" + ` +`+"```json"+` { "root":{ "title":"This is a title", @@ -71,11 +59,11 @@ The resulting JSON structure would look like this: ] } } -` + "```" + ` +`+"```"+` With cast set to true, the resulting JSON structure would look like this: -` + "```json" + ` +`+"```json"+` { "root":{ "title":"This is a title", @@ -90,41 +78,62 @@ With cast set to true, the resulting JSON structure would look like this: ] } } -` + "```" + ``, - Config: docs.FieldComponent().WithChildren( - docs.FieldString("operator", "An XML [operation](#operators) to apply to messages.").HasOptions("to_json").HasDefault(""), - docs.FieldBool("cast", "Whether to try to cast values that are numbers and booleans to the right type. Default: all values are strings.").HasDefault(false), - ), - }) +`+"```"). + Fields( + service.NewStringEnumField(pFieldOperator, "to_json"). + Description("An XML [operation](#operators) to apply to messages."). + Default(""), + service.NewBoolField(pFieldCast). + Description("Whether to try to cast values that are numbers and booleans to the right type. Default: all values are strings."). + Default(false), + ), + func(conf *service.ParsedConfig, mgr *service.Resources) (service.Processor, error) { + op, err := conf.FieldString(pFieldOperator) + if err != nil { + return nil, err + } + + cast, err := conf.FieldBool(pFieldCast) + if err != nil { + return nil, err + } + + return newXML(op, cast, mgr) + }) if err != nil { panic(err) } } type xmlProc struct { - log log.Modular + log *service.Logger cast bool } -func newXML(conf processor.XMLConfig, mgr bundle.NewManagement) (*xmlProc, error) { - if conf.Operator != "to_json" { - return nil, fmt.Errorf("operator not recognised: %v", conf.Operator) +func newXML(operator string, cast bool, mgr *service.Resources) (*xmlProc, error) { + if operator != "to_json" { + return nil, fmt.Errorf("operator not recognised: %v", operator) } j := &xmlProc{ log: mgr.Logger(), - cast: conf.Cast, + cast: cast, } return j, nil } -func (p *xmlProc) Process(ctx context.Context, msg *message.Part) ([]*message.Part, error) { - root, err := ToMap(msg.AsBytes(), p.cast) +func (p *xmlProc) Process(ctx context.Context, msg *service.Message) (service.MessageBatch, error) { + mBytes, err := msg.AsBytes() + if err != nil { + return nil, err + } + + root, err := ToMap(mBytes, p.cast) if err != nil { p.log.Debugf("Failed to parse part as XML: %v", err) return nil, err } msg.SetStructuredMut(root) - return []*message.Part{msg}, nil + return service.MessageBatch{msg}, nil } func (p *xmlProc) Close(ctx context.Context) error { diff --git a/internal/impl/xml/processor_test.go b/internal/impl/xml/processor_test.go index 30d7a14e2f..7088158546 100644 --- a/internal/impl/xml/processor_test.go +++ b/internal/impl/xml/processor_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/manager/mock" @@ -92,13 +93,15 @@ func TestXMLCases(t *testing.T) { }, } - conf := processor.NewConfig() - conf.Type = "xml" - conf.XML.Operator = "to_json" + conf, err := processor.FromYAML(` +xml: + operator: to_json +`) + require.NoError(t, err) + proc, err := mock.NewManager().NewProcessor(conf) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) + for _, test := range tests { t.Run(test.name, func(tt *testing.T) { msgsOut, res := proc.ProcessBatch(context.Background(), message.QuickBatch([][]byte{[]byte(test.input)})) @@ -117,17 +120,17 @@ func TestXMLCases(t *testing.T) { } func TestXMLWithCast(t *testing.T) { - conf := processor.NewConfig() - conf.Type = "xml" - conf.XML.Cast = true - conf.XML.Operator = "to_json" + conf, err := processor.FromYAML(` +xml: + operator: to_json + cast: true +`) + require.NoError(t, err) testString := `This is a title123True` proc, err := mock.NewManager().NewProcessor(conf) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) msgsOut, res := proc.ProcessBatch(context.Background(), message.QuickBatch([][]byte{[]byte(testString)})) if res != nil { diff --git a/internal/manager/type_test.go b/internal/manager/type_test.go index 4edec01fca..37410174e5 100644 --- a/internal/manager/type_test.go +++ b/internal/manager/type_test.go @@ -37,7 +37,7 @@ func TestManagerProcessorLabels(t *testing.T) { for _, l := range goodLabels { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = "root = this" + conf.Plugin = "root = this" conf.Label = l mgr, err := manager.New(manager.NewResourceConfig()) @@ -56,7 +56,7 @@ func TestManagerProcessorLabels(t *testing.T) { for _, l := range badLabels { conf := processor.NewConfig() conf.Type = "bloblang" - conf.Bloblang = "root = this" + conf.Plugin = "root = this" conf.Label = l mgr, err := manager.New(manager.NewResourceConfig()) diff --git a/internal/serverless/handler_test.go b/internal/serverless/handler_test.go index e54f3bb328..c77ce6d244 100644 --- a/internal/serverless/handler_test.go +++ b/internal/serverless/handler_test.go @@ -12,7 +12,6 @@ import ( "time" "github.com/stretchr/testify/require" - yaml "gopkg.in/yaml.v3" "github.com/benthosdev/benthos/v4/internal/component/output" "github.com/benthosdev/benthos/v4/internal/component/processor" @@ -22,13 +21,6 @@ import ( _ "github.com/benthosdev/benthos/v4/public/components/pure" ) -func parseYAMLOutputConf(t testing.TB, formatStr string, args ...any) (conf output.Config) { - t.Helper() - conf = output.NewConfig() - require.NoError(t, yaml.Unmarshal(fmt.Appendf(nil, formatStr, args...), &conf)) - return -} - func TestHandlerAsync(t *testing.T) { var results [][]byte var resMut sync.Mutex @@ -46,11 +38,13 @@ func TestHandlerAsync(t *testing.T) { })) defer ts.Close() + var err error conf := config.New() - conf.Output = parseYAMLOutputConf(t, ` + conf.Output, err = output.FromYAML(fmt.Sprintf(` http_client: url: %v -`, ts.URL) +`, ts.URL)) + require.NoError(t, err) h, err := NewHandler(conf) if err != nil { @@ -99,9 +93,11 @@ func TestHandlerSyncBatch(t *testing.T) { conf := config.New() conf.Output.Type = ServerlessResponseType - pConf := processor.NewConfig() - pConf.Type = "select_parts" - pConf.SelectParts.Parts = []int{0, 0, 0} + pConf, err := processor.FromYAML(` +select_parts: + parts: [ 0, 0, 0 ] +`) + require.NoError(t, err) conf.Pipeline.Processors = append(conf.Pipeline.Processors, pConf) @@ -131,9 +127,11 @@ func TestHandlerSyncBatches(t *testing.T) { conf := config.New() conf.Output.Type = ServerlessResponseType - pConf := processor.NewConfig() - pConf.Type = "select_parts" - pConf.SelectParts.Parts = []int{0, 0, 0} + pConf, err := processor.FromYAML(` +select_parts: + parts: [ 0, 0, 0 ] +`) + require.NoError(t, err) conf.Pipeline.Processors = append(conf.Pipeline.Processors, pConf) diff --git a/public/service/config.go b/public/service/config.go index df9af0ee7c..1d4f774dc1 100644 --- a/public/service/config.go +++ b/public/service/config.go @@ -100,6 +100,14 @@ func NewStringListField(name string) *ConfigField { } } +// NewStringListOfListsField describes a new config field consisting of a list +// of lists of strings (a 2D array of strings). +func NewStringListOfListsField(name string) *ConfigField { + return &ConfigField{ + field: docs.FieldString(name, "").ArrayOfArrays(), + } +} + // NewStringMapField describes a new config field consisting of an object of // arbitrary keys with string values. func NewStringMapField(name string) *ConfigField { @@ -182,6 +190,15 @@ func NewObjectListField(name string, fields ...*ConfigField) *ConfigField { } } +// NewObjectMapField describes a new map type config field consisting of +// objects with one or more child fields. +func NewObjectMapField(name string, fields ...*ConfigField) *ConfigField { + objField := NewObjectField(name, fields...) + return &ConfigField{ + field: objField.field.Map(), + } +} + // NewInternalField returns a ConfigField derived from an internal package field // spec. This function is for internal use only. func NewInternalField(ifield docs.FieldSpec) *ConfigField { @@ -716,6 +733,39 @@ func (p *ParsedConfig) FieldStringList(path ...string) ([]string, error) { return sList, nil } +// FieldStringListOfLists accesses a field that is a list of lists of strings +// from the parsed config by its name and returns the value. Returns an error if +// the field is not found, or is not a list of lists of strings. +func (p *ParsedConfig) FieldStringListOfLists(path ...string) ([][]string, error) { + v, exists := p.field(path...) + if !exists { + return nil, fmt.Errorf("field '%v' was not found in the config", p.fullDotPath(path...)) + } + iList, ok := v.([]any) + if !ok { + if sList, ok := v.([][]string); ok { + return sList, nil + } + return nil, fmt.Errorf("expected field '%v' to be a list of string lists, got %T", p.fullDotPath(path...), v) + } + sList := make([][]string, len(iList)) + for i, ev := range iList { + switch t := ev.(type) { + case []string: + sList[i] = t + case []any: + tmpList := make([]string, len(t)) + for j, evv := range t { + if tmpList[j], ok = evv.(string); !ok { + return nil, fmt.Errorf("expected field '%v' to be a string list, found an element of type %T", p.fullDotPath(path...), evv) + } + } + sList[i] = tmpList + } + } + return sList, nil +} + // FieldStringMap accesses a field that is an object of arbitrary keys and // string values from the parsed config by its name and returns the value. // Returns an error if the field is not found, or is not an object of strings. @@ -908,3 +958,26 @@ func (p *ParsedConfig) FieldObjectList(path ...string) ([]*ParsedConfig, error) } return sList, nil } + +// FieldObjectMap accesses a field that is a map of objects from the parsed +// config by its name and returns the value as a map of *ParsedConfig types, +// where each one represents an object in the map. Returns an error if the +// field is not found, or is not a map of objects. +func (p *ParsedConfig) FieldObjectMap(path ...string) (map[string]*ParsedConfig, error) { + v, exists := p.field(path...) + if !exists { + return nil, fmt.Errorf("field '%v' was not found in the config", p.fullDotPath(path...)) + } + iMap, ok := v.(map[string]any) + if !ok { + return nil, fmt.Errorf("expected field '%v' to be a map, got %T", p.fullDotPath(path...), v) + } + sMap := make(map[string]*ParsedConfig, len(iMap)) + for i, ev := range iMap { + sMap[i] = &ParsedConfig{ + mgr: p.mgr, + generic: ev, + } + } + return sMap, nil +} diff --git a/public/service/config_batch_policy_test.go b/public/service/config_batch_policy_test.go index b6a07a086d..19e2008d3a 100644 --- a/public/service/config_batch_policy_test.go +++ b/public/service/config_batch_policy_test.go @@ -34,7 +34,6 @@ a: assert.Equal(t, "5s", bConf.Period) require.Len(t, bConf.procs, 1) assert.Equal(t, "bloblang", bConf.procs[0].Type) - assert.Equal(t, "root = content().uppercase()", bConf.procs[0].Bloblang) } func TestBatcherPeriod(t *testing.T) { diff --git a/public/service/config_input.go b/public/service/config_input.go index 17c29d0592..42717304df 100644 --- a/public/service/config_input.go +++ b/public/service/config_input.go @@ -5,8 +5,6 @@ import ( "strconv" "strings" - "gopkg.in/yaml.v3" - "github.com/benthosdev/benthos/v4/internal/component/input" "github.com/benthosdev/benthos/v4/internal/docs" ) @@ -19,18 +17,6 @@ func NewInputField(name string) *ConfigField { } } -func inputConfFromAny(v any) (conf input.Config, err error) { - switch t := v.(type) { - case *yaml.Node: - err = t.Decode(&conf) - case input.Config: - conf = t - default: - err = fmt.Errorf("unexpected value, expected object, got %T", v) - } - return -} - // FieldInput accesses a field from a parsed config that was defined with // NewInputField and returns an OwnedInput, or an error if the configuration was // invalid. @@ -40,7 +26,7 @@ func (p *ParsedConfig) FieldInput(path ...string) (*OwnedInput, error) { return nil, fmt.Errorf("field '%v' was not found in the config", strings.Join(path, ".")) } - conf, err := inputConfFromAny(field) + conf, err := input.FromAny(p.mgr.Environment(), field) if err != nil { return nil, err } @@ -77,7 +63,7 @@ func (p *ParsedConfig) FieldInputList(path ...string) ([]*OwnedInput, error) { var configs []input.Config for i, iConf := range fieldArray { - conf, err := inputConfFromAny(iConf) + conf, err := input.FromAny(p.mgr.Environment(), iConf) if err != nil { return nil, fmt.Errorf("value %v: %w", i, err) } @@ -123,7 +109,7 @@ func (p *ParsedConfig) FieldInputMap(path ...string) (map[string]*OwnedInput, er tmpMgr := p.mgr.IntoPath(path...) ins := make(map[string]*OwnedInput, len(fieldMap)) for k, v := range fieldMap { - conf, err := inputConfFromAny(v) + conf, err := input.FromAny(p.mgr.Environment(), v) if err != nil { return nil, fmt.Errorf("value %v: %w", k, err) } diff --git a/public/service/config_output.go b/public/service/config_output.go index ce6f89865f..6b0b2f50e4 100644 --- a/public/service/config_output.go +++ b/public/service/config_output.go @@ -5,8 +5,6 @@ import ( "strconv" "strings" - "gopkg.in/yaml.v3" - "github.com/benthosdev/benthos/v4/internal/component/output" "github.com/benthosdev/benthos/v4/internal/docs" ) @@ -19,18 +17,6 @@ func NewOutputField(name string) *ConfigField { } } -func outputConfFromAny(v any) (conf output.Config, err error) { - switch t := v.(type) { - case *yaml.Node: - err = t.Decode(&conf) - case output.Config: - conf = t - default: - err = fmt.Errorf("unexpected value, expected object, got %T", v) - } - return -} - // FieldOutput accesses a field from a parsed config that was defined with // NewOutputField and returns an OwnedOutput, or an error if the configuration // was invalid. @@ -40,7 +26,7 @@ func (p *ParsedConfig) FieldOutput(path ...string) (*OwnedOutput, error) { return nil, fmt.Errorf("field '%v' was not found in the config", strings.Join(path, ".")) } - conf, err := outputConfFromAny(field) + conf, err := output.FromAny(p.mgr.Environment(), field) if err != nil { return nil, err } @@ -77,7 +63,7 @@ func (p *ParsedConfig) FieldOutputList(path ...string) ([]*OwnedOutput, error) { var configs []output.Config for i, iConf := range fieldArray { - conf, err := outputConfFromAny(iConf) + conf, err := output.FromAny(p.mgr.Environment(), iConf) if err != nil { return nil, fmt.Errorf("value %v: %w", i, err) } @@ -125,7 +111,7 @@ func (p *ParsedConfig) FieldOutputMap(path ...string) (map[string]*OwnedOutput, tmpMgr := p.mgr.IntoPath(path...) outs := make(map[string]*OwnedOutput, len(fieldMap)) for k, v := range fieldMap { - conf, err := outputConfFromAny(v) + conf, err := output.FromAny(p.mgr.Environment(), v) if err != nil { return nil, fmt.Errorf("value %v: %w", k, err) } diff --git a/public/service/config_processor.go b/public/service/config_processor.go index 49230a163f..3219be3f1a 100644 --- a/public/service/config_processor.go +++ b/public/service/config_processor.go @@ -5,8 +5,6 @@ import ( "strconv" "strings" - "gopkg.in/yaml.v3" - "github.com/benthosdev/benthos/v4/internal/component/processor" "github.com/benthosdev/benthos/v4/internal/docs" ) @@ -20,18 +18,6 @@ func NewProcessorField(name string) *ConfigField { } } -func procConfFromAny(v any) (conf processor.Config, err error) { - switch t := v.(type) { - case *yaml.Node: - err = t.Decode(&conf) - case processor.Config: - conf = t - default: - err = fmt.Errorf("unexpected value, expected object, got %T", v) - } - return -} - // FieldProcessor accesses a field from a parsed config that was defined with // NewProcessorField and returns an OwnedProcessor, or an error if the // configuration was invalid. @@ -41,7 +27,7 @@ func (p *ParsedConfig) FieldProcessor(path ...string) (*OwnedProcessor, error) { return nil, fmt.Errorf("field '%v' was not found in the config", strings.Join(path, ".")) } - conf, err := procConfFromAny(v) + conf, err := processor.FromAny(p.mgr.Environment(), v) if err != nil { return nil, err } @@ -75,7 +61,7 @@ func (p *ParsedConfig) fieldProcessorListConfigs(path ...string) ([]processor.Co var procConfigs []processor.Config for i, iConf := range procsArray { - pconf, err := procConfFromAny(iConf) + pconf, err := processor.FromAny(p.mgr.Environment(), iConf) if err != nil { return nil, fmt.Errorf("value %v: %w", i, err) } diff --git a/public/service/config_scanner.go b/public/service/config_scanner.go index f6c61ec342..a63d830c87 100644 --- a/public/service/config_scanner.go +++ b/public/service/config_scanner.go @@ -19,7 +19,7 @@ func NewScannerField(name string) *ConfigField { } func ownedScannerCreatorFromConfAny(mgr bundle.NewManagement, field any) (*OwnedScannerCreator, error) { - pluginConf, err := scanner.FromAny(mgr.Environment(), docs.TypeScanner, field) + pluginConf, err := scanner.FromAny(mgr.Environment(), field) if err != nil { return nil, err } diff --git a/public/service/config_util.go b/public/service/config_util.go index 345beb4772..6b871e5be8 100644 --- a/public/service/config_util.go +++ b/public/service/config_util.go @@ -1,8 +1,6 @@ package service import ( - "gopkg.in/yaml.v3" - "github.com/benthosdev/benthos/v4/internal/bundle" "github.com/benthosdev/benthos/v4/internal/docs" ) @@ -24,7 +22,7 @@ func extractConfig( nm bundle.NewManagement, spec *ConfigSpec, componentName string, - pluginConfig, componentConfig any, + pluginConfig any, ) (*ParsedConfig, error) { // All nested fields are under the namespace of the component type, and // therefore we need to namespace the manager such that metrics and logs @@ -33,27 +31,12 @@ func extractConfig( nm = nm.IntoPath(componentName) } - if pluginConfig != nil { - return spec.configFromAny(nm, pluginConfig) - } - - // TODO: V4 We won't need the below fallback once it's not possible to - // instantiate components in code with NewConfig() - var n yaml.Node - if err := n.Encode(componentConfig); err != nil { - return nil, err - } - - componentsMap := map[string]yaml.Node{} - if err := n.Decode(&componentsMap); err != nil { - return nil, err - } - - pluginNode, exists := componentsMap[componentName] - if !exists { - pluginNode = yaml.Node{} - _ = pluginNode.Encode(nil) + if pluginConfig == nil { + if spec.component.Config.Default != nil { + pluginConfig = *spec.component.Config.Default + } else if len(spec.component.Config.Children) > 0 { + pluginConfig = map[string]any{} + } } - - return spec.configFromNode(nm, &pluginNode) + return spec.configFromAny(nm, pluginConfig) } diff --git a/public/service/config_util_test.go b/public/service/config_util_test.go deleted file mode 100644 index cd3733e3d6..0000000000 --- a/public/service/config_util_test.go +++ /dev/null @@ -1,26 +0,0 @@ -package service - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/benthosdev/benthos/v4/internal/component/processor" -) - -func TestConfigDeprecatedExraction(t *testing.T) { - oldConf := processor.NewConfig() - oldConf.Type = "insert_part" - oldConf.InsertPart.Index = 3 - - spec := NewConfigSpec(). - Field(NewIntField("index")) - - pConf, err := extractConfig(nil, spec, "insert_part", nil, oldConf) - require.NoError(t, err) - - v, err := pConf.FieldInt("index") - require.NoError(t, err) - assert.Equal(t, 3, v) -} diff --git a/public/service/environment.go b/public/service/environment.go index 2462644f98..2c905facd5 100644 --- a/public/service/environment.go +++ b/public/service/environment.go @@ -124,7 +124,7 @@ func (e *Environment) RegisterBatchBuffer(name string, spec *ConfigSpec, ctor Ba componentSpec.Name = name componentSpec.Type = docs.TypeBuffer return e.internal.BufferAdd(func(conf buffer.Config, nm bundle.NewManagement) (buffer.Streamed, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -155,7 +155,7 @@ func (e *Environment) RegisterCache(name string, spec *ConfigSpec, ctor CacheCon componentSpec.Name = name componentSpec.Type = docs.TypeCache return e.internal.CacheAdd(func(conf cache.Config, nm bundle.NewManagement) (cache.V1, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -190,7 +190,7 @@ func (e *Environment) RegisterInput(name string, spec *ConfigSpec, ctor InputCon componentSpec.Name = name componentSpec.Type = docs.TypeInput return e.internal.InputAdd(iprocessors.WrapConstructor(func(conf input.Config, nm bundle.NewManagement) (input.Streamed, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -217,7 +217,7 @@ func (e *Environment) RegisterBatchInput(name string, spec *ConfigSpec, ctor Bat componentSpec.Name = name componentSpec.Type = docs.TypeInput return e.internal.InputAdd(iprocessors.WrapConstructor(func(conf input.Config, nm bundle.NewManagement) (input.Streamed, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -255,7 +255,7 @@ func (e *Environment) RegisterOutput(name string, spec *ConfigSpec, ctor OutputC componentSpec.Type = docs.TypeOutput return e.internal.OutputAdd(oprocessors.WrapConstructor( func(conf output.Config, nm bundle.NewManagement) (output.Streamed, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -294,7 +294,7 @@ func (e *Environment) RegisterBatchOutput(name string, spec *ConfigSpec, ctor Ba componentSpec.Type = docs.TypeOutput return e.internal.OutputAdd(oprocessors.WrapConstructor( func(conf output.Config, nm bundle.NewManagement) (output.Streamed, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -344,7 +344,7 @@ func (e *Environment) RegisterProcessor(name string, spec *ConfigSpec, ctor Proc componentSpec.Name = name componentSpec.Type = docs.TypeProcessor return e.internal.ProcessorAdd(func(conf processor.Config, nm bundle.NewManagement) (processor.V1, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -369,7 +369,7 @@ func (e *Environment) RegisterBatchProcessor(name string, spec *ConfigSpec, ctor componentSpec.Name = name componentSpec.Type = docs.TypeProcessor return e.internal.ProcessorAdd(func(conf processor.Config, nm bundle.NewManagement) (processor.V1, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -405,7 +405,7 @@ func (e *Environment) RegisterRateLimit(name string, spec *ConfigSpec, ctor Rate componentSpec.Name = name componentSpec.Type = docs.TypeRateLimit return e.internal.RateLimitAdd(func(conf ratelimit.Config, nm bundle.NewManagement) (ratelimit.V1, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -435,7 +435,7 @@ func (e *Environment) RegisterMetricsExporter(name string, spec *ConfigSpec, cto componentSpec.Name = name componentSpec.Type = docs.TypeMetrics return e.internal.MetricsAdd(func(conf metrics.Config, nm bundle.NewManagement) (metrics.Type, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -471,7 +471,7 @@ func (e *Environment) RegisterOtelTracerProvider(name string, spec *ConfigSpec, componentSpec.Name = name componentSpec.Type = docs.TypeTracer return e.internal.TracersAdd(func(conf tracer.Config, nm bundle.NewManagement) (trace.TracerProvider, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } @@ -503,7 +503,7 @@ func (e *Environment) RegisterBatchScannerCreator(name string, spec *ConfigSpec, componentSpec.Name = name componentSpec.Type = docs.TypeScanner return e.internal.ScannerAdd(func(conf scanner.Config, nm bundle.NewManagement) (scanner.Creator, error) { - pluginConf, err := extractConfig(nm, spec, name, conf.Plugin, conf) + pluginConf, err := extractConfig(nm, spec, name, conf.Plugin) if err != nil { return nil, err } diff --git a/public/service/processor.go b/public/service/processor.go index 59b6d86a71..aa78f0f25f 100644 --- a/public/service/processor.go +++ b/public/service/processor.go @@ -219,3 +219,16 @@ func ExecuteProcessors(ctx context.Context, processors []*OwnedProcessor, inbatc return ExecuteProcessors(ctx, processors[1:], nextBatches...) } + +type processorUnwrapper struct { + p processor.V1 +} + +func (w processorUnwrapper) Unwrap() processor.V1 { + return w.p +} + +// XUnwrapper is for internal use only, do not use this. +func (o *OwnedProcessor) XUnwrapper() any { + return processorUnwrapper{p: o.p} +} diff --git a/public/service/stream_builder_test.go b/public/service/stream_builder_test.go index a0193beae1..dcc57b3f50 100644 --- a/public/service/stream_builder_test.go +++ b/public/service/stream_builder_test.go @@ -414,8 +414,7 @@ type: local`)) bloblang: ""`, ` - label: "" - jmespath: - query: ""`, + jmespath: {}`, `output: label: "" drop:`, @@ -948,8 +947,7 @@ output: bloblang: ""`, ` - label: "" - jmespath: - query: ""`, + jmespath: {}`, `output: label: "" drop:`, @@ -1185,3 +1183,139 @@ logger: require.NoError(b, strm.Run(context.Background())) } } + +func TestStreamBuilderLargeNestingSmoke(t *testing.T) { + b := service.NewStreamBuilder() + require.NoError(t, b.SetYAML(` +input: + label: ibroker0 + broker: + inputs: + - label: ifoo + generate: + count: 1 + interval: 1ms + mapping: 'root = "ifoo: " + counter().string()' + processors: + - mutation: 'root = content().string() + " pfoo0"' + - mutation: 'root = content().string() + " pfoo1"' + - label: ibroker1 + broker: + inputs: + - label: ibar + generate: + count: 1 + interval: 1ms + mapping: 'root = "ibar: " + counter().string()' + processors: + - mutation: 'root = content().string() + " pbar0"' + - label: ibaz + generate: + count: 1 + interval: 1ms + mapping: 'root = "ibaz: " + counter().string()' + processors: + - mutation: 'root = content().string() + " pbaz0"' + processors: + - mutation: 'root = content().string() + " pibroker10"' + processors: + - mutation: 'root = content().string() + " pibroker00"' + +pipeline: + processors: + - try: + - mutation: 'root = content().string() + " pquack0"' + - for_each: + - mutation: 'root = content().string() + " pwoof0"' + +output: + label: obroker0 + broker: + outputs: + - label: ofoo + drop: {} + processors: + - mutation: 'root = content().string() + " pofoo0"' + - label: obroker1 + broker: + outputs: + - label: obar + drop: {} + - label: obaz + drop: {} + processors: + - mutation: 'root = content().string() + " pobaz0"' + processors: + - mutation: 'root = content().string() + " pobroker10"' + processors: + - mutation: 'root = content().string() + " pobroker00"' +`)) + + strm, tracSum, err := b.BuildTraced() + require.NoError(t, err) + + tCtx, done := context.WithTimeout(context.Background(), time.Second) + defer done() + require.NoError(t, strm.Run(tCtx)) + + eventKeys := map[string]map[string]struct{}{} + for k, v := range tracSum.InputEvents() { + eMap := map[string]struct{}{} + for _, e := range v { + eMap[e.Content] = struct{}{} + } + eventKeys[k] = eMap + } + + assert.Equal(t, map[string]map[string]struct{}{ + "ifoo": {"ifoo: 1 pfoo0 pfoo1": struct{}{}}, + "ibar": {"ibar: 1 pbar0": struct{}{}}, + "ibaz": {"ibaz: 1 pbaz0": struct{}{}}, + "ibroker0": { + "ifoo: 1 pfoo0 pfoo1 pibroker00": struct{}{}, + "ibar: 1 pbar0 pibroker10 pibroker00": struct{}{}, + "ibaz: 1 pbaz0 pibroker10 pibroker00": struct{}{}, + }, + "ibroker1": { + "ibar: 1 pbar0 pibroker10": struct{}{}, + "ibaz: 1 pbaz0 pibroker10": struct{}{}, + }, + }, eventKeys) + + eventKeys = map[string]map[string]struct{}{} + for k, v := range tracSum.OutputEvents() { + eMap := map[string]struct{}{} + for _, e := range v { + eMap[e.Content] = struct{}{} + } + eventKeys[k] = eMap + } + + assert.Equal(t, map[string]map[string]struct{}{ + "ofoo": { + "ifoo: 1 pfoo0 pfoo1 pibroker00 pquack0 pwoof0 pobroker00 pofoo0": struct{}{}, + "ibar: 1 pbar0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pofoo0": struct{}{}, + "ibaz: 1 pbaz0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pofoo0": struct{}{}, + }, + "obar": { + "ifoo: 1 pfoo0 pfoo1 pibroker00 pquack0 pwoof0 pobroker00 pobroker10": struct{}{}, + "ibar: 1 pbar0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pobroker10": struct{}{}, + "ibaz: 1 pbaz0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pobroker10": struct{}{}, + }, + "obaz": { + "ifoo: 1 pfoo0 pfoo1 pibroker00 pquack0 pwoof0 pobroker00 pobroker10 pobaz0": struct{}{}, + "ibar: 1 pbar0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pobroker10 pobaz0": struct{}{}, + "ibaz: 1 pbaz0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pobroker10 pobaz0": struct{}{}, + }, + "obroker0": { + "ifoo: 1 pfoo0 pfoo1 pibroker00 pquack0 pwoof0 pobroker00": struct{}{}, + "ibar: 1 pbar0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00": struct{}{}, + "ibaz: 1 pbaz0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00": struct{}{}, + }, + "obroker1": { + "ifoo: 1 pfoo0 pfoo1 pibroker00 pquack0 pwoof0 pobroker00 pobroker10": struct{}{}, + "ibar: 1 pbar0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pobroker10": struct{}{}, + "ibaz: 1 pbaz0 pibroker10 pibroker00 pquack0 pwoof0 pobroker00 pobroker10": struct{}{}, + }, + }, eventKeys) +} diff --git a/website/docs/components/processors/bloblang.md b/website/docs/components/processors/bloblang.md index e7b895e121..eaedc39adb 100644 --- a/website/docs/components/processors/bloblang.md +++ b/website/docs/components/processors/bloblang.md @@ -14,7 +14,6 @@ categories: ["Mapping","Parsing"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Executes a [Bloblang](/docs/guides/bloblang/about) mapping on messages. ```yml @@ -119,11 +118,8 @@ pipeline: ## Error Handling -Bloblang mappings can fail, in which case the message remains unchanged, errors -are logged, and the message is flagged as having failed, allowing you to use +Bloblang mappings can fail, in which case the message remains unchanged, errors are logged, and the message is flagged as having failed, allowing you to use [standard processor error handling patterns](/docs/configuration/error_handling). -However, Bloblang itself also provides powerful ways of ensuring your mappings -do not fail by specifying desired fallback behaviour, which you can read about -[in this section](/docs/guides/bloblang/about#error-handling). +However, Bloblang itself also provides powerful ways of ensuring your mappings do not fail by specifying desired fallback behaviour, which you can read about [in this section](/docs/guides/bloblang/about#error-handling). diff --git a/website/docs/components/processors/bounds_check.md b/website/docs/components/processors/bounds_check.md index bd2210a2ae..cb0eebcc32 100644 --- a/website/docs/components/processors/bounds_check.md +++ b/website/docs/components/processors/bounds_check.md @@ -14,7 +14,6 @@ categories: ["Utility"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Removes messages (and batches) that do not fit within certain size boundaries. diff --git a/website/docs/components/processors/branch.md b/website/docs/components/processors/branch.md index dfebc247a8..a449f6b2b6 100644 --- a/website/docs/components/processors/branch.md +++ b/website/docs/components/processors/branch.md @@ -14,46 +14,30 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -The `branch` processor allows you to create a new request message via -a [Bloblang mapping](/docs/guides/bloblang/about), execute a list of processors -on the request messages, and, finally, map the result back into the source -message using another mapping. +The `branch` processor allows you to create a new request message via a [Bloblang mapping](/docs/guides/bloblang/about), execute a list of processors on the request messages, and, finally, map the result back into the source message using another mapping. ```yml # Config fields, showing default values label: "" branch: request_map: "" - processors: [] + processors: [] # No default (required) result_map: "" ``` -This is useful for preserving the original message contents when using -processors that would otherwise replace the entire contents. +This is useful for preserving the original message contents when using processors that would otherwise replace the entire contents. ### Metadata -Metadata fields that are added to messages during branch processing will not be -automatically copied into the resulting message. In order to do this you should -explicitly declare in your `result_map` either a wholesale copy with -`meta = meta()`, or selective copies with -`meta foo = meta("bar")` and so on. +Metadata fields that are added to messages during branch processing will not be automatically copied into the resulting message. In order to do this you should explicitly declare in your `result_map` either a wholesale copy with `meta = meta()`, or selective copies with `meta foo = meta("bar")` and so on. ### Error Handling -If the `request_map` fails the child processors will not be executed. -If the child processors themselves result in an (uncaught) error then the -`result_map` will not be executed. If the `result_map` fails -the message will remain unchanged. Under any of these conditions standard -[error handling methods](/docs/configuration/error_handling) can be used in -order to filter, DLQ or recover the failed messages. +If the `request_map` fails the child processors will not be executed. If the child processors themselves result in an (uncaught) error then the `result_map` will not be executed. If the `result_map` fails the message will remain unchanged. Under any of these conditions standard [error handling methods](/docs/configuration/error_handling) can be used in order to filter, DLQ or recover the failed messages. ### Conditional Branching -If the root of your request map is set to `deleted()` then the branch -processors are skipped for the given message, this allows you to conditionally -branch messages. +If the root of your request map is set to `deleted()` then the branch processors are skipped for the given message, this allows you to conditionally branch messages. ## Fields @@ -88,7 +72,6 @@ A list of processors to apply to mapped requests. When processing message batche Type: `array` -Default: `[]` ### `result_map` @@ -132,9 +115,7 @@ result_map: |- -This example strips the request message into an empty body, grabs an HTTP -payload, and places the result back into the original message at the path -`image.pull_count`: +This example strips the request message into an empty body, grabs an HTTP payload, and places the result back into the original message at the path `image.pull_count`: ```yaml pipeline: @@ -177,9 +158,7 @@ pipeline: -This example maps a new payload for triggering a lambda function with an ID and -username from the original message, and the result of the lambda is discarded, -meaning the original message is unchanged. +This example maps a new payload for triggering a lambda function with an ID and username from the original message, and the result of the lambda is discarded, meaning the original message is unchanged. ```yaml pipeline: @@ -198,8 +177,7 @@ pipeline: -This example caches a document by a message ID only when the type of the -document is a foo: +This example caches a document by a message ID only when the type of the document is a foo: ```yaml pipeline: diff --git a/website/docs/components/processors/cache.md b/website/docs/components/processors/cache.md index 240ed84fd6..ade4b90e57 100644 --- a/website/docs/components/processors/cache.md +++ b/website/docs/components/processors/cache.md @@ -14,7 +14,6 @@ categories: ["Integration"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Performs operations against a [cache resource](/docs/components/caches/about) for each message, allowing you to store or retrieve data within message payloads. @@ -29,10 +28,10 @@ Performs operations against a [cache resource](/docs/components/caches/about) fo # Common config fields, showing default values label: "" cache: - resource: "" - operator: "" - key: "" - value: "" + resource: "" # No default (required) + operator: "" # No default (required) + key: "" # No default (required) + value: "" # No default (optional) ``` @@ -42,11 +41,11 @@ cache: # All config fields, showing default values label: "" cache: - resource: "" - operator: "" - key: "" - value: "" - ttl: "" + resource: "" # No default (required) + operator: "" # No default (required) + key: "" # No default (required) + value: "" # No default (optional) + ttl: 60s # No default (optional) ``` @@ -67,10 +66,7 @@ This processor will interpolate functions within the `key` and `value` fields in -Deduplication can be done using the add operator with a key extracted from the -message payload, since it fails when a key already exists we can remove the -duplicates using a -[`mapping` processor](/docs/components/processors/mapping): +Deduplication can be done using the add operator with a key extracted from the message payload, since it fails when a key already exists we can remove the duplicates using a [`mapping` processor](/docs/components/processors/mapping): ```yaml pipeline: @@ -105,6 +101,7 @@ pipeline: } else { deleted() } processors: - cache: + resource: foocache operator: add key: ${! content() } value: t @@ -119,8 +116,7 @@ pipeline: -It's possible to enrich payloads with content previously stored in a cache by -using the [`branch`](/docs/components/processors/branch) processor: +It's possible to enrich payloads with content previously stored in a cache by using the [`branch`](/docs/components/processors/branch) processor: ```yaml pipeline: @@ -154,7 +150,6 @@ The [`cache` resource](/docs/components/caches/about) to target with this proces Type: `string` -Default: `""` ### `operator` @@ -162,7 +157,6 @@ The [operation](#operators) to perform with the cache. Type: `string` -Default: `""` Options: `set`, `add`, `get`, `delete`. ### `key` @@ -172,7 +166,6 @@ This field supports [interpolation functions](/docs/configuration/interpolation# Type: `string` -Default: `""` ### `value` @@ -181,7 +174,6 @@ This field supports [interpolation functions](/docs/configuration/interpolation# Type: `string` -Default: `""` ### `ttl` @@ -190,7 +182,6 @@ This field supports [interpolation functions](/docs/configuration/interpolation# Type: `string` -Default: `""` Requires version 3.33.0 or newer ```yml diff --git a/website/docs/components/processors/catch.md b/website/docs/components/processors/catch.md index 360e6943bc..965eed0b34 100644 --- a/website/docs/components/processors/catch.md +++ b/website/docs/components/processors/catch.md @@ -14,20 +14,15 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Applies a list of child processors _only_ when a previous processing step has -failed. +Applies a list of child processors _only_ when a previous processing step has failed. ```yml # Config fields, showing default values label: "" -catch: [] # No default (required) +catch: [] ``` -Behaves similarly to the [`for_each`](/docs/components/processors/for_each) processor, where a -list of child processors are applied to individual messages of a batch. However, -processors are only applied to messages that failed a processing step prior to -the catch. +Behaves similarly to the [`for_each`](/docs/components/processors/for_each) processor, where a list of child processors are applied to individual messages of a batch. However, processors are only applied to messages that failed a processing step prior to the catch. For example, with the following config: @@ -40,13 +35,9 @@ pipeline: - resource: baz ``` -If the processor `foo` fails for a particular message, that message -will be fed into the processors `bar` and `baz`. Messages that do not -fail for the processor `foo` will skip these processors. +If the processor `foo` fails for a particular message, that message will be fed into the processors `bar` and `baz`. Messages that do not fail for the processor `foo` will skip these processors. -When messages leave the catch block their fail flags are cleared. This processor -is useful for when it's possible to recover failed messages, or when special -actions (such as logging/metrics) are required before dropping them. +When messages leave the catch block their fail flags are cleared. This processor is useful for when it's possible to recover failed messages, or when special actions (such as logging/metrics) are required before dropping them. More information about error handling can be found [here](/docs/configuration/error_handling). diff --git a/website/docs/components/processors/compress.md b/website/docs/components/processors/compress.md index ef5df4172d..bccba2f730 100644 --- a/website/docs/components/processors/compress.md +++ b/website/docs/components/processors/compress.md @@ -14,15 +14,13 @@ categories: ["Parsing"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Compresses messages according to the selected algorithm. Supported compression -algorithms are: gzip, pgzip, zlib, flate, snappy, lz4. +Compresses messages according to the selected algorithm. Supported compression algorithms are: [flate gzip lz4 pgzip snappy zlib] ```yml # Config fields, showing default values label: "" compress: - algorithm: "" + algorithm: "" # No default (required) level: -1 ``` @@ -36,8 +34,7 @@ The compression algorithm to use. Type: `string` -Default: `""` -Options: `gzip`, `pgzip`, `zlib`, `flate`, `snappy`, `lz4`. +Options: `flate`, `gzip`, `lz4`, `pgzip`, `snappy`, `zlib`. ### `level` diff --git a/website/docs/components/processors/decompress.md b/website/docs/components/processors/decompress.md index 92c23baa8d..45b5b9f9df 100644 --- a/website/docs/components/processors/decompress.md +++ b/website/docs/components/processors/decompress.md @@ -14,15 +14,13 @@ categories: ["Parsing"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Decompresses messages according to the selected algorithm. Supported -decompression types are: gzip, pgzip, zlib, bzip2, flate, snappy, lz4. +Decompresses messages according to the selected algorithm. Supported decompression algorithms are: [bzip2 flate gzip lz4 pgzip snappy zlib] ```yml # Config fields, showing default values label: "" decompress: - algorithm: "" + algorithm: "" # No default (required) ``` ## Fields @@ -33,7 +31,6 @@ The decompression algorithm to use. Type: `string` -Default: `""` -Options: `gzip`, `pgzip`, `zlib`, `bzip2`, `flate`, `snappy`, `lz4`. +Options: `bzip2`, `flate`, `gzip`, `lz4`, `pgzip`, `snappy`, `zlib`. diff --git a/website/docs/components/processors/dedupe.md b/website/docs/components/processors/dedupe.md index 916108e987..1e7c31f3be 100644 --- a/website/docs/components/processors/dedupe.md +++ b/website/docs/components/processors/dedupe.md @@ -20,8 +20,8 @@ Deduplicates messages by storing a key value in a cache using the `add` operator # Config fields, showing default values label: "" dedupe: - cache: "" - key: "" + cache: "" # No default (required) + key: ${! meta("kafka_key") } # No default (required) drop_on_err: true ``` @@ -47,7 +47,6 @@ The [`cache` resource](/docs/components/caches/about) to target with this proces Type: `string` -Default: `""` ### `key` @@ -56,7 +55,6 @@ This field supports [interpolation functions](/docs/configuration/interpolation# Type: `string` -Default: `""` ```yml # Examples diff --git a/website/docs/components/processors/for_each.md b/website/docs/components/processors/for_each.md index cefe053b0f..d39c34f4a7 100644 --- a/website/docs/components/processors/for_each.md +++ b/website/docs/components/processors/for_each.md @@ -14,9 +14,7 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -A processor that applies a list of child processors to messages of a batch as -though they were each a batch of one message. +A processor that applies a list of child processors to messages of a batch as though they were each a batch of one message. ```yml # Config fields, showing default values @@ -24,12 +22,8 @@ label: "" for_each: [] ``` -This is useful for forcing batch wide processors such as -[`dedupe`](/docs/components/processors/dedupe) or interpolations such -as the `value` field of the `metadata` processor to execute -on individual message parts of a batch instead. +This is useful for forcing batch wide processors such as [`dedupe`](/docs/components/processors/dedupe) or interpolations such as the `value` field of the `metadata` processor to execute on individual message parts of a batch instead. -Please note that most processors already process per message of a batch, and -this processor is not needed in those cases. +Please note that most processors already process per message of a batch, and this processor is not needed in those cases. diff --git a/website/docs/components/processors/grok.md b/website/docs/components/processors/grok.md index ae5df57a6a..783bb03d9e 100644 --- a/website/docs/components/processors/grok.md +++ b/website/docs/components/processors/grok.md @@ -14,7 +14,6 @@ categories: ["Parsing"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Parses messages into a structured format by attempting to apply a list of Grok expressions, the first expression to result in at least one value replaces the original message with a JSON object containing the values. @@ -29,7 +28,7 @@ Parses messages into a structured format by attempting to apply a list of Grok e # Common config fields, showing default values label: "" grok: - expressions: [] + expressions: [] # No default (required) pattern_definitions: {} pattern_paths: [] ``` @@ -41,7 +40,7 @@ grok: # All config fields, showing default values label: "" grok: - expressions: [] + expressions: [] # No default (required) pattern_definitions: {} pattern_paths: [] named_captures_only: true @@ -102,7 +101,6 @@ One or more Grok expressions to attempt against incoming messages. The first exp Type: `array` -Default: `[]` ### `pattern_definitions` diff --git a/website/docs/components/processors/group_by.md b/website/docs/components/processors/group_by.md index 271ec46c71..498dbb8109 100644 --- a/website/docs/components/processors/group_by.md +++ b/website/docs/components/processors/group_by.md @@ -14,7 +14,6 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Splits a [batch of messages](/docs/configuration/batching) into N batches, where each resulting batch contains a group of messages determined by a [Bloblang query](/docs/guides/bloblang/about). ```yml @@ -35,7 +34,6 @@ A [Bloblang query](/docs/guides/bloblang/about) that should return a boolean val Type: `string` -Default: `""` ```yml # Examples diff --git a/website/docs/components/processors/group_by_value.md b/website/docs/components/processors/group_by_value.md index fd5d4b4b1a..305dfa232d 100644 --- a/website/docs/components/processors/group_by_value.md +++ b/website/docs/components/processors/group_by_value.md @@ -20,7 +20,7 @@ Splits a batch of messages into N batches, where each resulting batch contains a # Config fields, showing default values label: "" group_by_value: - value: "" + value: ${! meta("kafka_key") } # No default (required) ``` This allows you to group messages using arbitrary fields within their content or metadata, process them individually, and send them to unique locations as per their group. @@ -36,7 +36,6 @@ This field supports [interpolation functions](/docs/configuration/interpolation# Type: `string` -Default: `""` ```yml # Examples @@ -48,9 +47,7 @@ value: ${! json("foo.bar") }-${! meta("baz") } ## Examples -If we were consuming Kafka messages and needed to group them by their key, -archive the groups, and send them to S3 with the key as part of the path we -could achieve that with the following: +If we were consuming Kafka messages and needed to group them by their key, archive the groups, and send them to S3 with the key as part of the path we could achieve that with the following: ```yaml pipeline: diff --git a/website/docs/components/processors/insert_part.md b/website/docs/components/processors/insert_part.md index 60402545e2..3e360dd0fa 100644 --- a/website/docs/components/processors/insert_part.md +++ b/website/docs/components/processors/insert_part.md @@ -14,9 +14,7 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Insert a new message into a batch at an index. If the specified index is greater -than the length of the existing batch it will be appended to the end. +Insert a new message into a batch at an index. If the specified index is greater than the length of the existing batch it will be appended to the end. ```yml # Config fields, showing default values @@ -26,17 +24,11 @@ insert_part: content: "" ``` -The index can be negative, and if so the message will be inserted from the end -counting backwards starting from -1. E.g. if index = -1 then the new message -will become the last of the batch, if index = -2 then the new message will be -inserted before the last message, and so on. If the negative index is greater -than the length of the existing batch it will be inserted at the beginning. +The index can be negative, and if so the message will be inserted from the end counting backwards starting from -1. E.g. if index = -1 then the new message will become the last of the batch, if index = -2 then the new message will be inserted before the last message, and so on. If the negative index is greater than the length of the existing batch it will be inserted at the beginning. -The new message will have metadata copied from the first pre-existing message of -the batch. +The new message will have metadata copied from the first pre-existing message of the batch. -This processor will interpolate functions within the 'content' field, you can -find a list of functions [here](/docs/configuration/interpolation#bloblang-queries). +This processor will interpolate functions within the 'content' field, you can find a list of functions [here](/docs/configuration/interpolation#bloblang-queries). ## Fields diff --git a/website/docs/components/processors/jmespath.md b/website/docs/components/processors/jmespath.md index 0b12382c6b..e4b99c6569 100644 --- a/website/docs/components/processors/jmespath.md +++ b/website/docs/components/processors/jmespath.md @@ -14,15 +14,13 @@ categories: ["Mapping"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Executes a [JMESPath query](http://jmespath.org/) on JSON documents and replaces -the message with the resulting document. +Executes a [JMESPath query](http://jmespath.org/) on JSON documents and replaces the message with the resulting document. ```yml # Config fields, showing default values label: "" jmespath: - query: "" + query: "" # No default (required) ``` :::note Try out Bloblang @@ -38,7 +36,6 @@ The JMESPath query to apply to messages. Type: `string` -Default: `""` ## Examples diff --git a/website/docs/components/processors/jq.md b/website/docs/components/processors/jq.md index 8cb356355d..57d82bc569 100644 --- a/website/docs/components/processors/jq.md +++ b/website/docs/components/processors/jq.md @@ -14,7 +14,6 @@ categories: ["Mapping"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Transforms and filters messages using jq queries. @@ -29,7 +28,7 @@ Transforms and filters messages using jq queries. # Common config fields, showing default values label: "" jq: - query: "" + query: "" # No default (required) ``` @@ -39,7 +38,7 @@ jq: # All config fields, showing default values label: "" jq: - query: "" + query: "" # No default (required) raw: false output_raw: false ``` @@ -51,29 +50,19 @@ jq: For better performance and improved capabilities try out native Benthos mapping with the [`mapping` processor](/docs/components/processors/mapping). ::: -The provided query is executed on each message, targeting either the contents -as a structured JSON value or as a raw string using the field `raw`, -and the message is replaced with the query result. +The provided query is executed on each message, targeting either the contents as a structured JSON value or as a raw string using the field `raw`, and the message is replaced with the query result. -Message metadata is also accessible within the query from the variable -`$metadata`. +Message metadata is also accessible within the query from the variable `$metadata`. -This processor uses the [gojq library][gojq], and therefore does not require -jq to be installed as a dependency. However, this also means there are some -differences in how these queries are executed versus the jq cli which you can -[read about here][gojq-difference]. +This processor uses the [gojq library][gojq], and therefore does not require jq to be installed as a dependency. However, this also means there are some differences in how these queries are executed versus the jq cli which you can [read about here][gojq-difference]. -If the query does not emit any value then the message is filtered, if the query -returns multiple values then the resulting message will be an array containing -all values. +If the query does not emit any value then the message is filtered, if the query returns multiple values then the resulting message will be an array containing all values. The full query syntax is described in [jq's documentation][jq-docs]. ## Error Handling -Queries can fail, in which case the message remains unchanged, errors are -logged, and the message is flagged as having failed, allowing you to use -[standard processor error handling patterns](/docs/configuration/error_handling). +Queries can fail, in which case the message remains unchanged, errors are logged, and the message is flagged as having failed, allowing you to use [standard processor error handling patterns](/docs/configuration/error_handling). ## Fields @@ -83,7 +72,6 @@ The jq query to filter and transform messages with. Type: `string` -Default: `""` ### `raw` diff --git a/website/docs/components/processors/json_schema.md b/website/docs/components/processors/json_schema.md index afbd093963..cd6bfa5dbc 100644 --- a/website/docs/components/processors/json_schema.md +++ b/website/docs/components/processors/json_schema.md @@ -14,21 +14,17 @@ categories: ["Mapping"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Checks messages against a provided JSONSchema definition but does not change the -payload under any circumstances. If a message does not match the schema it can -be caught using error handling methods outlined [here](/docs/configuration/error_handling). +Checks messages against a provided JSONSchema definition but does not change the payload under any circumstances. If a message does not match the schema it can be caught using error handling methods outlined [here](/docs/configuration/error_handling). ```yml # Config fields, showing default values label: "" json_schema: - schema: "" - schema_path: "" + schema: "" # No default (optional) + schema_path: "" # No default (optional) ``` -Please refer to the [JSON Schema website](https://json-schema.org/) for -information and tutorials regarding the syntax of the schema. +Please refer to the [JSON Schema website](https://json-schema.org/) for information and tutorials regarding the syntax of the schema. ## Fields @@ -38,7 +34,6 @@ A schema to apply. Use either this or the `schema_path` field. Type: `string` -Default: `""` ### `schema_path` @@ -46,7 +41,6 @@ The path of a schema document to apply. Use either this or the `schema` field. Type: `string` -Default: `""` ## Examples diff --git a/website/docs/components/processors/log.md b/website/docs/components/processors/log.md index 05db13545b..cae792aef8 100644 --- a/website/docs/components/processors/log.md +++ b/website/docs/components/processors/log.md @@ -21,7 +21,11 @@ Prints a log event for each message. Messages always remain unchanged. The log m label: "" log: level: INFO - fields_mapping: "" + fields_mapping: |- # No default (optional) + root.reason = "cus I wana" + root.id = this.id + root.age = this.user.age.number() + root.kafka_topic = meta("kafka_topic") message: "" ``` @@ -62,8 +66,6 @@ An optional [Bloblang mapping](/docs/guides/bloblang/about) that can be used to Type: `string` -Default: `""` -Requires version 3.40.0 or newer ```yml # Examples diff --git a/website/docs/components/processors/metric.md b/website/docs/components/processors/metric.md index 3c7cd9b9a3..8140c5e937 100644 --- a/website/docs/components/processors/metric.md +++ b/website/docs/components/processors/metric.md @@ -20,9 +20,9 @@ Emit custom metrics by extracting values from messages. # Config fields, showing default values label: "" metric: - type: "" - name: "" - labels: {} + type: "" # No default (required) + name: "" # No default (required) + labels: {} # No default (optional) value: "" ``` @@ -38,7 +38,6 @@ The metric [type](#types) to create. Type: `string` -Default: `""` Options: `counter`, `counter_by`, `gauge`, `timing`. ### `name` @@ -47,7 +46,6 @@ The name of the metric to create, this must be unique across all Benthos compone Type: `string` -Default: `""` ### `labels` @@ -56,7 +54,6 @@ This field supports [interpolation functions](/docs/configuration/interpolation# Type: `object` -Default: `{}` ```yml # Examples diff --git a/website/docs/components/processors/parallel.md b/website/docs/components/processors/parallel.md index 3ccc43d30c..0cc52650ee 100644 --- a/website/docs/components/processors/parallel.md +++ b/website/docs/components/processors/parallel.md @@ -14,7 +14,6 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - A processor that applies a list of child processors to messages of a batch as though they were each a batch of one message (similar to the [`for_each`](/docs/components/processors/for_each) processor), but where each message is processed in parallel. ```yml @@ -22,7 +21,7 @@ A processor that applies a list of child processors to messages of a batch as th label: "" parallel: cap: 0 - processors: [] + processors: [] # No default (required) ``` The field `cap`, if greater than zero, caps the maximum number of parallel processing threads. @@ -45,6 +44,5 @@ A list of child processors to apply. Type: `array` -Default: `[]` diff --git a/website/docs/components/processors/parse_log.md b/website/docs/components/processors/parse_log.md index 9bc420e6c3..b0ca41ec9a 100644 --- a/website/docs/components/processors/parse_log.md +++ b/website/docs/components/processors/parse_log.md @@ -14,9 +14,7 @@ categories: ["Parsing"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Parses common log [formats](#formats) into [structured data](#codecs). This is -easier and often much faster than [`grok`](/docs/components/processors/grok). +Parses common log [formats](#formats) into [structured data](#codecs). This is easier and often much faster than [`grok`](/docs/components/processors/grok). @@ -41,8 +38,7 @@ parse_log: # All config fields, showing default values label: "" parse_log: - format: "" - codec: "" + format: "" # No default (required) best_effort: true allow_rfc3339: true default_year: current @@ -60,18 +56,8 @@ A common log [format](#formats) to parse. Type: `string` -Default: `""` Options: `syslog_rfc5424`, `syslog_rfc3164`. -### `codec` - -Specifies the structured format to parse a log into. - - -Type: `string` -Default: `""` -Options: `json`. - ### `best_effort` Still returns partially parsed messages even if an error occurs. @@ -112,8 +98,7 @@ Currently the only supported structured data codec is `json`. ### `syslog_rfc5424` -Attempts to parse a log following the [Syslog rfc5424](https://tools.ietf.org/html/rfc5424) -spec. The resulting structured document may contain any of the following fields: +Attempts to parse a log following the [Syslog rfc5424](https://tools.ietf.org/html/rfc5424) spec. The resulting structured document may contain any of the following fields: - `message` (string) - `timestamp` (string, RFC3339) @@ -129,8 +114,7 @@ spec. The resulting structured document may contain any of the following fields: ### `syslog_rfc3164` -Attempts to parse a log following the [Syslog rfc3164](https://tools.ietf.org/html/rfc3164) -spec. The resulting structured document may contain any of the following fields: +Attempts to parse a log following the [Syslog rfc3164](https://tools.ietf.org/html/rfc3164) spec. The resulting structured document may contain any of the following fields: - `message` (string) - `timestamp` (string, RFC3339) diff --git a/website/docs/components/processors/rate_limit.md b/website/docs/components/processors/rate_limit.md index 5abbff9750..234b32a490 100644 --- a/website/docs/components/processors/rate_limit.md +++ b/website/docs/components/processors/rate_limit.md @@ -14,17 +14,13 @@ categories: ["Utility"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Throttles the throughput of a pipeline according to a specified -[`rate_limit`](/docs/components/rate_limits/about) resource. Rate limits are -shared across components and therefore apply globally to all processing -pipelines. +Throttles the throughput of a pipeline according to a specified [`rate_limit`](/docs/components/rate_limits/about) resource. Rate limits are shared across components and therefore apply globally to all processing pipelines. ```yml # Config fields, showing default values label: "" rate_limit: - resource: "" + resource: "" # No default (required) ``` ## Fields @@ -35,6 +31,5 @@ The target [`rate_limit` resource](/docs/components/rate_limits/about). Type: `string` -Default: `""` diff --git a/website/docs/components/processors/resource.md b/website/docs/components/processors/resource.md index 5a7874431f..182245520c 100644 --- a/website/docs/components/processors/resource.md +++ b/website/docs/components/processors/resource.md @@ -14,7 +14,6 @@ categories: ["Utility"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Resource is a processor type that runs a processor resource identified by its label. ```yml diff --git a/website/docs/components/processors/select_parts.md b/website/docs/components/processors/select_parts.md index d4fa5af19b..82232287aa 100644 --- a/website/docs/components/processors/select_parts.md +++ b/website/docs/components/processors/select_parts.md @@ -14,9 +14,7 @@ categories: ["Utility"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Cherry pick a set of messages from a batch by their index. Indexes larger than -the number of messages are simply ignored. +Cherry pick a set of messages from a batch by their index. Indexes larger than the number of messages are simply ignored. ```yml # Config fields, showing default values @@ -25,17 +23,11 @@ select_parts: parts: [] ``` -The selected parts are added to the new message batch in the same order as the -selection array. E.g. with 'parts' set to [ 2, 0, 1 ] and the message parts -[ '0', '1', '2', '3' ], the output will be [ '2', '0', '1' ]. +The selected parts are added to the new message batch in the same order as the selection array. E.g. with 'parts' set to [ 2, 0, 1 ] and the message parts [ '0', '1', '2', '3' ], the output will be [ '2', '0', '1' ]. -If none of the selected parts exist in the input batch (resulting in an empty -output message) the batch is dropped entirely. +If none of the selected parts exist in the input batch (resulting in an empty output message) the batch is dropped entirely. -Message indexes can be negative, and if so the part will be selected from the -end counting backwards starting from -1. E.g. if index = -1 then the selected -part will be the last part of the message, if index = -2 then the part before -the last element with be selected, and so on. +Message indexes can be negative, and if so the part will be selected from the end counting backwards starting from -1. E.g. if index = -1 then the selected part will be the last part of the message, if index = -2 then the part before the last element with be selected, and so on. This processor is only applicable to [batched messages](/docs/configuration/batching). diff --git a/website/docs/components/processors/sleep.md b/website/docs/components/processors/sleep.md index dad41254e9..61d2f19751 100644 --- a/website/docs/components/processors/sleep.md +++ b/website/docs/components/processors/sleep.md @@ -20,7 +20,7 @@ Sleep for a period of time specified as a duration string for each message. This # Config fields, showing default values label: "" sleep: - duration: "" + duration: "" # No default (required) ``` ## Fields @@ -32,6 +32,5 @@ This field supports [interpolation functions](/docs/configuration/interpolation# Type: `string` -Default: `""` diff --git a/website/docs/components/processors/split.md b/website/docs/components/processors/split.md index db5eb28d6c..658a549d56 100644 --- a/website/docs/components/processors/split.md +++ b/website/docs/components/processors/split.md @@ -14,7 +14,6 @@ categories: ["Utility"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Breaks message batches (synonymous with multiple part messages) into smaller batches. The size of the resulting batches are determined either by a discrete size or, if the field `byte_size` is non-zero, then by total size in bytes (which ever limit is reached first). ```yml diff --git a/website/docs/components/processors/subprocess.md b/website/docs/components/processors/subprocess.md index f797adac16..a68d392541 100644 --- a/website/docs/components/processors/subprocess.md +++ b/website/docs/components/processors/subprocess.md @@ -14,7 +14,6 @@ categories: ["Integration"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Executes a command as a subprocess and, for each message, will pipe its contents to the stdin stream of the process followed by a newline. @@ -29,7 +28,7 @@ Executes a command as a subprocess and, for each message, will pipe its contents # Common config fields, showing default values label: "" subprocess: - name: "" + name: cat # No default (required) args: [] ``` @@ -40,7 +39,7 @@ subprocess: # All config fields, showing default values label: "" subprocess: - name: "" + name: cat # No default (required) args: [] max_buffer: 65536 codec_send: lines @@ -78,7 +77,6 @@ The command to execute as a subprocess. Type: `string` -Default: `""` ```yml # Examples diff --git a/website/docs/components/processors/switch.md b/website/docs/components/processors/switch.md index 8db2123b28..c15a20d480 100644 --- a/website/docs/components/processors/switch.md +++ b/website/docs/components/processors/switch.md @@ -14,7 +14,6 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - Conditionally processes messages based on their contents. ```yml diff --git a/website/docs/components/processors/sync_response.md b/website/docs/components/processors/sync_response.md index c13a1c9290..1ada452586 100644 --- a/website/docs/components/processors/sync_response.md +++ b/website/docs/components/processors/sync_response.md @@ -14,9 +14,7 @@ categories: ["Utility"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Adds the payload in its current state as a synchronous response to the input -source, where it is dealt with according to that specific input type. +Adds the payload in its current state as a synchronous response to the input source, where it is dealt with according to that specific input type. ```yml # Config fields, showing default values @@ -24,10 +22,7 @@ label: "" sync_response: {} ``` -For most inputs this mechanism is ignored entirely, in which case the sync -response is dropped without penalty. It is therefore safe to use this processor -even when combining input types that might not have support for sync responses. -An example of an input able to utilise this is the `http_server`. +For most inputs this mechanism is ignored entirely, in which case the sync response is dropped without penalty. It is therefore safe to use this processor even when combining input types that might not have support for sync responses. An example of an input able to utilise this is the `http_server`. For more information please read [Synchronous Responses](/docs/guides/sync_responses). diff --git a/website/docs/components/processors/try.md b/website/docs/components/processors/try.md index ee6a9cf38d..0c047099d8 100644 --- a/website/docs/components/processors/try.md +++ b/website/docs/components/processors/try.md @@ -63,6 +63,3 @@ pipeline: ``` - - - diff --git a/website/docs/components/processors/while.md b/website/docs/components/processors/while.md index add39882ab..51486195d2 100644 --- a/website/docs/components/processors/while.md +++ b/website/docs/components/processors/while.md @@ -14,7 +14,6 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - A processor that checks a [Bloblang query](/docs/guides/bloblang/about/) against each batch of messages and executes child processors on them for as long as the query resolves to true. @@ -31,7 +30,7 @@ label: "" while: at_least_once: false check: "" - processors: [] + processors: [] # No default (required) ``` @@ -44,7 +43,7 @@ while: at_least_once: false max_loops: 0 check: "" - processors: [] + processors: [] # No default (required) ``` @@ -98,6 +97,5 @@ A list of child processors to execute on each loop. Type: `array` -Default: `[]` diff --git a/website/docs/components/processors/workflow.md b/website/docs/components/processors/workflow.md index 57fbb80170..5a649b9588 100644 --- a/website/docs/components/processors/workflow.md +++ b/website/docs/components/processors/workflow.md @@ -14,9 +14,7 @@ categories: ["Composition"] import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - -Executes a topology of [`branch` processors][processors.branch], -performing them in parallel where possible. +Executes a topology of [`branch` processors][processors.branch], performing them in parallel where possible. .result_map` diff --git a/website/docs/components/processors/xml.md b/website/docs/components/processors/xml.md index 745445d4c9..e43e9f4259 100644 --- a/website/docs/components/processors/xml.md +++ b/website/docs/components/processors/xml.md @@ -17,9 +17,7 @@ import TabItem from '@theme/TabItem'; :::caution BETA This component is mostly stable but breaking changes could still be made outside of major version releases if a fundamental problem with the component is found. ::: - -Parses messages as an XML document, performs a mutation on the data, and then -overwrites the previous contents with the new value. +Parses messages as an XML document, performs a mutation on the data, and then overwrites the previous contents with the new value. ```yml # Config fields, showing default values @@ -33,13 +31,10 @@ xml: ### `to_json` -Converts an XML document into a JSON structure, where elements appear as keys of -an object according to the following rules: +Converts an XML document into a JSON structure, where elements appear as keys of an object according to the following rules: -- If an element contains attributes they are parsed by prefixing a hyphen, - `-`, to the attribute label. -- If the element is a simple element and has attributes, the element value - is given the key `#text`. +- If an element contains attributes they are parsed by prefixing a hyphen, `-`, to the attribute label. +- If the element is a simple element and has attributes, the element value is given the key `#text`. - XML comments, directives, and process instructions are ignored. - When elements are repeated the resulting JSON value is an array.