Skip to content

Commit

Permalink
feat: removed reliance on terraform state for config hashes (#384)
Browse files Browse the repository at this point in the history
Removed storing the component config hash in terraform state, and
instead store it in a `.mach/hashes.json` file.

Because reading state requires a terraform init for every component
before actually checking for changes the actual apply process is very
slow. By storing the hashes into a file on disk we can speed up the
process enormously as we can now skip many of the inits up front.
  • Loading branch information
demeyerthom committed Mar 22, 2024
2 parents 97847be + c0cf7df commit 45aeea7
Show file tree
Hide file tree
Showing 64 changed files with 1,078 additions and 1,217 deletions.
3 changes: 3 additions & 0 deletions .changes/unreleased/Added-20240318-114635.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kind: Added
body: Moved hash storage out of terraform
time: 2024-03-18T11:46:35.79447595+01:00
3 changes: 3 additions & 0 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ tasks:
test:
cmd: go test -race ./...

test:integration:
cmd: go test -tags=integration ./...

cover:
cmd: go test -race -coverprofile=coverage.out -covermode=atomic ./...

Expand Down
2 changes: 1 addition & 1 deletion docs/src/reference/cli/mach-composer_apply.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ mach-composer apply [flags]
-c, --component stringArray
--destroy Destroy option is a convenient way to destroy all remote objects managed by this mach config
-f, --file string YAML file to parse. (default "main.yml")
--force-init Force terraform initialization. By default mach-composer will reuse existing terraform resources
-h, --help help for apply
--ignore-change-detection Ignore change detection to run even if the components are considered up to date
--ignore-version Skip MACH composer version check
--output-path string Outputs path to store the generated files. (default "deployments")
--reuse Suppress a terraform init for improved speed (not recommended for production usage)
-s, --site string Site to parse. If not set parse all sites.
--var-file string Use a variable file to parse the configuration with.
-w, --workers int The number of workers to use (default 1)
Expand Down
2 changes: 1 addition & 1 deletion docs/src/reference/cli/mach-composer_plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ mach-composer plan [flags]
```
-c, --component stringArray
-f, --file string YAML file to parse. (default "main.yml")
--force-init Force terraform initialization. By default mach-composer will reuse existing terraform resources
-h, --help help for plan
--ignore-change-detection Ignore change detection to run even if the components are considered up to date
--ignore-version Skip MACH composer version check
--lock Acquire a lock on the state file before running terraform plan (default true)
--output-path string Outputs path to store the generated files. (default "deployments")
--reuse Suppress a terraform init for improved speed (not recommended for production usage)
-s, --site string Site to parse. If not set parse all sites.
--var-file string Use a variable file to parse the configuration with.
-w, --workers int The number of workers to use (default 1)
Expand Down
2 changes: 1 addition & 1 deletion docs/src/reference/cli/mach-composer_show-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ mach-composer show-plan [flags]

```
-f, --file string YAML file to parse. (default "main.yml")
--force-init Force terraform initialization. By default mach-composer will reuse existing terraform resources
-h, --help help for show-plan
--ignore-change-detection Ignore change detection to run even if the components are considered up to date
--ignore-version Skip MACH composer version check
--no-color Disable color output
--output-path string Outputs path to store the generated files. (default "deployments")
--reuse Suppress a terraform init for improved speed (not recommended for production usage)
-s, --site string Site to parse. If not set parse all sites.
--var-file string Use a variable file to parse the configuration with.
-w, --workers int The number of workers to use (default 1)
Expand Down
1 change: 0 additions & 1 deletion docs/src/reference/cli/mach-composer_terraform.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ mach-composer terraform [flags]
--ignore-change-detection Ignore change detection to run even if the components are considered up to date. Per default the proxy will ignore change detection (default true)
--ignore-version Skip MACH composer version check
--output-path string Outputs path to store the generated files. (default "deployments")
--reuse Suppress a terraform init for improved speed (not recommended for production usage)
-s, --site string Site to parse. If not set parse all sites.
--var-file string Use a variable file to parse the configuration with.
-w, --workers int The number of workers to use (default 1)
Expand Down
5 changes: 5 additions & 0 deletions internal/batcher/batcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package batcher

import "github.com/mach-composer/mach-composer-cli/internal/graph"

type BatchFunc func(g *graph.Graph) map[int][]graph.Node
29 changes: 29 additions & 0 deletions internal/batcher/naive_batcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package batcher

import "github.com/mach-composer/mach-composer-cli/internal/graph"

func NaiveBatchFunc() BatchFunc {
return func(g *graph.Graph) map[int][]graph.Node {
batches := map[int][]graph.Node{}

var sets = map[string][]graph.Path{}

for _, n := range g.Vertices() {
var route, _ = g.Routes(n.Path(), g.StartNode.Path())
sets[n.Path()] = route
}

for k, routes := range sets {
var mx int
for _, route := range routes {
if len(route) > mx {
mx = len(route)
}
}
n, _ := g.Vertex(k)
batches[mx] = append(batches[mx], n)
}

return batches
}
}
86 changes: 86 additions & 0 deletions internal/batcher/naive_batcher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package batcher

import (
"github.com/dominikbraun/graph"
internalgraph "github.com/mach-composer/mach-composer-cli/internal/graph"
"github.com/stretchr/testify/assert"
"testing"
)

func TestBatchNodesDepth1(t *testing.T) {
ig := graph.New(func(n internalgraph.Node) string { return n.Path() }, graph.Directed(), graph.Tree(), graph.PreventCycles())

start := new(internalgraph.NodeMock)
start.On("Path").Return("main/site-1")

_ = ig.AddVertex(start)

g := &internalgraph.Graph{Graph: ig, StartNode: start}

batches := NaiveBatchFunc()(g)

assert.Equal(t, 1, len(batches))
}

func TestBatchNodesDepth2(t *testing.T) {
ig := graph.New(func(n internalgraph.Node) string { return n.Path() }, graph.Directed(), graph.Tree(), graph.PreventCycles())

site := new(internalgraph.NodeMock)
site.On("Path").Return("main/site-1")

component1 := new(internalgraph.NodeMock)
component1.On("Path").Return("main/site-1/component-1")

component2 := new(internalgraph.NodeMock)
component2.On("Path").Return("main/site-1/component-2")

_ = ig.AddVertex(site)
_ = ig.AddVertex(component1)
_ = ig.AddVertex(component2)

_ = ig.AddEdge("main/site-1", "main/site-1/component-1")
_ = ig.AddEdge("main/site-1", "main/site-1/component-2")

g := &internalgraph.Graph{Graph: ig, StartNode: site}

batches := NaiveBatchFunc()(g)

assert.Equal(t, 2, len(batches))
assert.Equal(t, 1, len(batches[0]))
assert.Equal(t, "main/site-1", batches[0][0].Path())
assert.Equal(t, 2, len(batches[1]))
assert.Contains(t, batches[1][0].Path(), "component")
assert.Contains(t, batches[1][1].Path(), "component")
}

func TestBatchNodesDepth3(t *testing.T) {
ig := graph.New(func(n internalgraph.Node) string { return n.Path() }, graph.Directed(), graph.Tree(), graph.PreventCycles())

site := new(internalgraph.NodeMock)
site.On("Path").Return("main/site-1")

component1 := new(internalgraph.NodeMock)
component1.On("Path").Return("main/site-1/component-1")

component2 := new(internalgraph.NodeMock)
component2.On("Path").Return("main/site-1/component-2")

_ = ig.AddVertex(site)
_ = ig.AddVertex(component1)
_ = ig.AddVertex(component2)

_ = ig.AddEdge("main/site-1", "main/site-1/component-1")
_ = ig.AddEdge("main/site-1/component-1", "main/site-1/component-2")

g := &internalgraph.Graph{Graph: ig, StartNode: site}

batches := NaiveBatchFunc()(g)

assert.Equal(t, 3, len(batches))
assert.Equal(t, 1, len(batches[0]))
assert.Equal(t, "main/site-1", batches[0][0].Path())
assert.Equal(t, 1, len(batches[1]))
assert.Contains(t, batches[1][0].Path(), "main/site-1/component-1")
assert.Equal(t, 1, len(batches[2]))
assert.Contains(t, batches[2][0].Path(), "main/site-1/component-2")
}
19 changes: 11 additions & 8 deletions internal/cmd/apply.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package cmd

import (
"github.com/mach-composer/mach-composer-cli/internal/batcher"
"github.com/mach-composer/mach-composer-cli/internal/graph"
"github.com/mach-composer/mach-composer-cli/internal/hash"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"

Expand All @@ -10,7 +12,7 @@ import (
)

var applyFlags struct {
reuse bool
forceInit bool
autoApprove bool
destroy bool
components []string
Expand All @@ -32,7 +34,7 @@ var applyCmd = &cobra.Command{

func init() {
registerCommonFlags(applyCmd)
applyCmd.Flags().BoolVarP(&applyFlags.reuse, "reuse", "", false, "Suppress a terraform init for improved speed (not recommended for production usage)")
applyCmd.Flags().BoolVarP(&applyFlags.forceInit, "force-init", "", false, "Force terraform initialization. By default mach-composer will reuse existing terraform resources")
applyCmd.Flags().BoolVarP(&applyFlags.autoApprove, "auto-approve", "", false, "Suppress a terraform init for improved speed (not recommended for production usage)")
applyCmd.Flags().BoolVarP(&applyFlags.destroy, "destroy", "", false, "Destroy option is a convenient way to destroy all remote objects managed by this mach config")
applyCmd.Flags().StringArrayVarP(&applyFlags.components, "component", "c", nil, "")
Expand Down Expand Up @@ -61,13 +63,14 @@ func applyFunc(cmd *cobra.Command, _ []string) error {
return err
}

b := runner.NewGraphRunner(commonFlags.workers)
r := runner.NewGraphRunner(
batcher.NaiveBatchFunc(),
hash.Factory(cfg),
commonFlags.workers,
)

if err = checkReuse(ctx, dg, b, applyFlags.reuse); err != nil {
return err
}

return b.TerraformApply(ctx, dg, &runner.ApplyOptions{
return r.TerraformApply(ctx, dg, &runner.ApplyOptions{
ForceInit: applyFlags.forceInit,
Destroy: applyFlags.destroy,
AutoApprove: applyFlags.autoApprove,
IgnoreChangeDetection: applyFlags.ignoreChangeDetection,
Expand Down
130 changes: 130 additions & 0 deletions internal/cmd/apply_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
//go:build integration

package cmd

import (
"github.com/stretchr/testify/assert"
"os"
"os/exec"
"path"
"testing"
)

import (
"github.com/stretchr/testify/suite"
)

type ApplyTestSuite struct {
suite.Suite
tempDir string
}

func TestExampleTestSuite(t *testing.T) {
suite.Run(t, new(ApplyTestSuite))
}

func (s *ApplyTestSuite) SetupSuite() {
_, err := exec.LookPath("terraform")
if err != nil {
s.T().Fatal("terraform command not found")
}

tmpDir, _ := os.MkdirTemp("mach-composer", "test")
_ = os.Setenv("TF_PLUGIN_CACHE_DIR", tmpDir)
_ = os.Setenv("TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE", "1")

s.tempDir = tmpDir
}

func (s *ApplyTestSuite) TearDownSuite() {
_ = os.RemoveAll(s.tempDir)
}

func cleanWorkingDir(workdir string) {
err := os.RemoveAll(path.Join(workdir, "deployments"))
if err != nil {
panic(err)
}
err = os.RemoveAll(path.Join(workdir, "states"))
if err != nil {
panic(err)
}
err = os.RemoveAll(path.Join(workdir, "hashes.json"))
if err != nil {
panic(err)
}
}

func (s *ApplyTestSuite) TestApplySimple() {
pwd, _ := os.Getwd()
workdir := path.Join(pwd, "testdata/cases/apply/simple")
defer cleanWorkingDir(workdir)

cmd := RootCmd
_ = os.Setenv("MC_HASH_FILE", path.Join(workdir, "hashes.json"))
cmd.SetArgs([]string{
"apply",
"--output-path", path.Join(workdir, "deployments"),
"--file", path.Join(workdir, "main.yaml"),
"--auto-approve",
})
err := cmd.Execute()
assert.NoError(s.T(), err)

assert.FileExists(s.T(), path.Join(workdir, "hashes.json"))
assert.FileExists(s.T(), path.Join(workdir, "deployments/main/test-1/main.tf"))
assert.FileExists(s.T(), path.Join(workdir, "deployments/main/test-1/states/test-1.tfstate"))
assert.FileExists(s.T(), path.Join(workdir, "deployments/main/test-1/outputs/component-1.json"))
}

func (s *ApplyTestSuite) TestApplySplitState() {
pwd, _ := os.Getwd()
workdir := path.Join(pwd, "testdata/cases/apply/split-state")
defer cleanWorkingDir(workdir)

cmd := RootCmd
_ = os.Setenv("MC_HASH_FILE", path.Join(workdir, "hashes.json"))
_ = os.Setenv("STATES_PATH", path.Join(workdir, "states"))
cmd.SetArgs([]string{
"apply",
"--output-path", path.Join(workdir, "deployments"),
"--file", path.Join(workdir, "main.yaml"),
"--auto-approve",
})
err := cmd.Execute()
assert.NoError(s.T(), err)

assert.FileExists(s.T(), path.Join(workdir, "hashes.json"))
assert.FileExists(s.T(), path.Join(workdir, "deployments/main/test-1/main.tf"))
assert.FileExists(s.T(), path.Join(workdir, "deployments/main/test-1/component-2/main.tf"))
assert.FileExists(s.T(), path.Join(workdir, "states/test-1.tfstate"))
assert.FileExists(s.T(), path.Join(workdir, "states/component-2.tfstate"))
}

func (s *ApplyTestSuite) TestApplyNoHashesFile() {
pwd, _ := os.Getwd()
workdir := path.Join(pwd, "testdata/cases/apply/simple")
defer cleanWorkingDir(workdir)

cmd := RootCmd
_ = os.Setenv("MC_HASH_FILE", path.Join(workdir, "hashes.json"))
cmd.SetArgs([]string{
"apply",
"--output-path", path.Join(workdir, "deployments"),
"--file", path.Join(workdir, "main.yaml"),
"--auto-approve",
})
err := cmd.Execute()
assert.NoError(s.T(), err)

assert.FileExists(s.T(), path.Join(workdir, "hashes.json"))

err = os.RemoveAll(path.Join(workdir, "hashes.json"))
if err != nil {
s.T().Fatal(err)
}

err = cmd.Execute()
assert.NoError(s.T(), err)
assert.FileExists(s.T(), path.Join(workdir, "hashes.json"))
}
12 changes: 0 additions & 12 deletions internal/cmd/common.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
package cmd

import (
"context"
"errors"
"fmt"
"github.com/mach-composer/mach-composer-cli/internal/cloud"
"github.com/mach-composer/mach-composer-cli/internal/graph"
"github.com/mach-composer/mach-composer-cli/internal/runner"
"os"
"path"
"path/filepath"
Expand Down Expand Up @@ -110,12 +107,3 @@ func loadConfig(cmd *cobra.Command, resolveVars bool) *config.MachConfig {

return cfg
}

func checkReuse(ctx context.Context, dg *graph.Graph, b *runner.GraphRunner, reuse bool) error {
if reuse {
log.Info().Msgf("Reusing existing terraform state")
return nil

}
return b.TerraformInit(ctx, dg)
}
Loading

0 comments on commit 45aeea7

Please sign in to comment.