From 4790ab6f27e364cc070b1d003a9d9cc3aa1aa95d Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Tue, 14 Jun 2022 12:16:56 -0700 Subject: [PATCH] hclsyntax: Short-circuit behavior for the && and || operators Previously we just always evaluated both operands of any binary operator and then executed the operator. For the logical operators that was inconsistent with their treatment in several other languages with a similar grammar to ours, leading to users being surprised that HCL didn't short circuit the same way as their favorite other languages did. Our initial exclusion of short-circuiting here was, as with various other parts of HCL's design, motivated by the goals of producing consistent results even when unknown values are present and of proactively returning as many errors as possible in order to give better context when debugging. However, in acknowledgement of the fact that logical operator short-circuiting has considerable ergonomic benefits when writing out compound conditional expressions where later terms rely on the guarantees of earlier terms, this commit implements a compromise design where we can get the short-circuit benefits for dynamic-value-related errors without losing our ability to proactively detect type-related errors even when short-circuiting. Specifically, if a particular operator participates in short-circuiting then it gets an opportunity to decide for a particular known LHS value whether to short-circuit. If it decides to do so then HCL will evaluate the RHS in a type-checking-only mode, where we ignore any specific values in the variable scope but will still raise errors if the RHS expression tries to do anything to them that is inconsistent with their type. If the LHS of a short-circuit-capable operator turns out to be an unknown value of a suitable type, we'll pessimistically treat it as a short-circuit and defer full evaluation of the RHS until we have more information, so as to avoid raising errors that would be guarded away once the LHS becomes known. The effect of this compromise is that it's possible to use the short-circuit operators in common value-related guard situations, like checking whether a value is null or is a number that would be valid to index a particular list: foo != null && foo.something idx < 3 && foo[idx] On the other hand though, it is _not_ possible to use the behavior to guard against problems that are related to types rather than to values, which allows us to still proactively detect various kinds of errors that are likely to be mistakes: foo != null && fo.something # Typoed "foo" as "fo" foo != null && foo.smething # Typoed "something" as "smething" num < 3 && num.something # Numbers never have attributes Those coming from dynamic languages will probably still find these errors surprising, but HCL's other features follow a similar sort of hybrid model of statically checking types where possible and that tradeoff seems to have worked well to make it possible to express more complicated situations while still providing some of the help typically expected from a static type system for "obviously wrong" situations. It should typically be possible to adjust an expression to make it friendly to short-circuit guard style by being more precise about the types being used. --- eval_context.go | 34 +++++++++++ hclsyntax/expression_ops.go | 62 ++++++++++++++++++- hclsyntax/expression_test.go | 111 +++++++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 3 deletions(-) diff --git a/eval_context.go b/eval_context.go index 915910ad..87f9bb58 100644 --- a/eval_context.go +++ b/eval_context.go @@ -23,3 +23,37 @@ func (ctx *EvalContext) NewChild() *EvalContext { func (ctx *EvalContext) Parent() *EvalContext { return ctx.parent } + +// NewChildAllVariablesUnknown is an extension of NewChild which, in addition +// to creating a child context, also pre-populates its Variables table +// with variable definitions masking every variable define in the reciever +// and its ancestors with an unknown value of the same type as the original. +// +// The child does not initially have any of its own functions defined, and so +// it can still inherit any defined functions from the reciever. +// +// Because is function effectively takes a snapshot of the variables as they +// are defined at the time of the call, it is incorrect to subsequently +// modify the variables in any of the ancestor contexts in a way that would +// change which variables are defined or what value types they each have. +// +// This is a specialized helper function intended to support type-checking +// use-cases, where the goal is only to check whether values are being used +// in a way that makes sense for their types while not reacting to their +// actual values. +func (ctx *EvalContext) NewChildAllVariablesUnknown() *EvalContext { + ret := ctx.NewChild() + ret.Variables = make(map[string]cty.Value) + + currentAncestor := ctx + for currentAncestor != nil { + for name, val := range currentAncestor.Variables { + if _, ok := ret.Variables[name]; !ok { + ret.Variables[name] = cty.UnknownVal(val.Type()) + } + } + currentAncestor = currentAncestor.parent + } + + return ret +} diff --git a/hclsyntax/expression_ops.go b/hclsyntax/expression_ops.go index a351096d..f6674949 100644 --- a/hclsyntax/expression_ops.go +++ b/hclsyntax/expression_ops.go @@ -13,16 +13,39 @@ import ( type Operation struct { Impl function.Function Type cty.Type + + // ShortCircuit is an optional callback for binary operations which, if + // set, will be called with the result of evaluating the LHS expression. + // + // ShortCircuit may return cty.NilVal to allow evaluation to proceed + // as normal, or it may return a non-nil value to force the operation + // to return that value and perform only type checking on the RHS + // expression, as opposed to full evaluation. + ShortCircuit func(lhs cty.Value) cty.Value } var ( OpLogicalOr = &Operation{ Impl: stdlib.OrFunc, Type: cty.Bool, + + ShortCircuit: func(lhs cty.Value) cty.Value { + if lhs.RawEquals(cty.True) { + return cty.True + } + return cty.NilVal + }, } OpLogicalAnd = &Operation{ Impl: stdlib.AndFunc, Type: cty.Bool, + + ShortCircuit: func(lhs cty.Value) cty.Value { + if lhs.RawEquals(cty.False) { + return cty.False + } + return cty.NilVal + }, } OpLogicalNot = &Operation{ Impl: stdlib.NotFunc, @@ -146,10 +169,7 @@ func (e *BinaryOpExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) var diags hcl.Diagnostics givenLHSVal, lhsDiags := e.LHS.Value(ctx) - givenRHSVal, rhsDiags := e.RHS.Value(ctx) diags = append(diags, lhsDiags...) - diags = append(diags, rhsDiags...) - lhsVal, err := convert.Convert(givenLHSVal, lhsParam.Type) if err != nil { diags = append(diags, &hcl.Diagnostic{ @@ -162,6 +182,35 @@ func (e *BinaryOpExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) EvalContext: ctx, }) } + + // If this is a short-circuiting operator and the LHS produces a + // short-circuiting result then we'll evaluate the RHS only for type + // checking purposes, ignoring any specific values, as a compromise + // between the convenience of a total short-circuit behavior and the + // benefit of not masking type errors on the RHS that we could still + // give earlier feedback about. + var forceResult cty.Value + rhsCtx := ctx + if e.Op.ShortCircuit != nil { + if !givenLHSVal.IsKnown() { + // If this is a short-circuit operator and our LHS value is + // unknown then we can't predict whether we would short-circuit + // yet, and so we must proceed under the assumption that we _will_ + // short-circuit to avoid raising any errors on the RHS that would + // eventually be hidden by the short-circuit behavior once LHS + // becomes known. + forceResult = cty.UnknownVal(e.Op.Type) + rhsCtx = ctx.NewChildAllVariablesUnknown() + } else if forceResult = e.Op.ShortCircuit(givenLHSVal); forceResult != cty.NilVal { + // This ensures that we'll only be type-checking against any + // variables used on the RHS, while not raising any errors about + // their values. + rhsCtx = ctx.NewChildAllVariablesUnknown() + } + } + + givenRHSVal, rhsDiags := e.RHS.Value(rhsCtx) + diags = append(diags, rhsDiags...) rhsVal, err := convert.Convert(givenRHSVal, rhsParam.Type) if err != nil { diags = append(diags, &hcl.Diagnostic{ @@ -181,6 +230,13 @@ func (e *BinaryOpExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) return cty.UnknownVal(e.Op.Type), diags } + // If we short-circuited above and still passed the type-check of RHS then + // we'll halt here and return the short-circuit result rather than actually + // executing the opertion. + if forceResult != cty.NilVal { + return forceResult, diags + } + args := []cty.Value{lhsVal, rhsVal} result, err := impl.Call(args) if err != nil { diff --git a/hclsyntax/expression_test.go b/hclsyntax/expression_test.go index 77d959fb..0c2840a9 100644 --- a/hclsyntax/expression_test.go +++ b/hclsyntax/expression_test.go @@ -1740,6 +1740,64 @@ EOT cty.False, 0, }, + { + // Logical AND operator short-circuit behavior + `nullobj != null && nullobj.is_thingy`, + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "nullobj": cty.NullVal(cty.Object(map[string]cty.Type{ + "is_thingy": cty.Bool, + })), + }, + }, + cty.False, + 0, // nullobj != null prevents evaluating nullobj.is_thingy + }, + { + // Logical AND short-circuit handling of unknown values + // If the first operand is an unknown bool then we can't know if + // we will short-circuit or not, and so we must assume we will + // and wait until the value becomes known before fully evaluating RHS. + `unknown < 4 && list[zero]`, + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "unknown": cty.UnknownVal(cty.Number), + "zero": cty.Zero, + "list": cty.ListValEmpty(cty.Bool), + }, + }, + cty.UnknownVal(cty.Bool), + 0, + }, + { + // Logical OR operator short-circuit behavior + `nullobj == null || nullobj.is_thingy`, + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "nullobj": cty.NullVal(cty.Object(map[string]cty.Type{ + "is_thingy": cty.Bool, + })), + }, + }, + cty.True, + 0, // nullobj == null prevents evaluating nullobj.is_thingy + }, + { + // Logical OR short-circuit handling of unknown values + // If the first operand is an unknown bool then we can't know if + // we will short-circuit or not, and so we must assume we will + // and wait until the value becomes known before fully evaluating RHS. + `unknown > 4 || list[zero]`, + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "unknown": cty.UnknownVal(cty.Number), + "zero": cty.Zero, + "list": cty.ListValEmpty(cty.Bool), + }, + }, + cty.UnknownVal(cty.Bool), + 0, + }, { `true ? var : null`, &hcl.EvalContext{ @@ -1983,6 +2041,59 @@ func TestExpressionErrorMessages(t *testing.T) { // describe coherently. "The true and false result expressions must have consistent types. At least one deeply-nested attribute or element is not compatible across both the 'true' and the 'false' value.", }, + + // Error messages describing situations where the logical operator + // short-circuit behavior still found a type error on the RHS that + // we therefore still report, because the LHS only guards against + // value-related problems in the RHS. + { + // It's not valid to access an attribute on a non-object-typed + // value even if we've proven it isn't null. + "notobj != null && notobj.foo", + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "notobj": cty.True, + }, + }, + "Unsupported attribute", + "Can't access attributes on a primitive-typed value (bool).", + }, + { + // It's not valid to access an attribute on a non-object-typed + // value even if we've proven it isn't null. + "notobj == null || notobj.foo", + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "notobj": cty.True, + }, + }, + "Unsupported attribute", + "Can't access attributes on a primitive-typed value (bool).", + }, + { + // It's not valid to access an index on an unindexable type + // even if we've proven it isn't null. + "notlist != null && notlist[0]", + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "notlist": cty.True, + }, + }, + "Invalid index", + "This value does not have any indices.", + }, + { + // Short-circuit can't avoid an error accessing a variable that + // doesn't exist at all, so we can still report typos. + "value != null && valeu", + &hcl.EvalContext{ + Variables: map[string]cty.Value{ + "value": cty.True, + }, + }, + "Unknown variable", + `There is no variable named "valeu". Did you mean "value"?`, + }, } for _, test := range tests {