From 305d10b193eaea825d365f1d388c5b1942c42ba8 Mon Sep 17 00:00:00 2001 From: Anish Ramasekar Date: Thu, 26 Jan 2023 09:11:00 -0800 Subject: [PATCH] feat: use cert ready checker for webhook readiness (#721) Signed-off-by: Anish Ramasekar --- cmd/webhook/main.go | 38 +++++++++++++------ config/manager/manager.yaml | 5 +++ ...webhook-controller-manager-deployment.yaml | 5 +++ manifest_staging/deploy/azure-wi-webhook.yaml | 5 +++ scripts/ci-e2e.sh | 4 +- 5 files changed, 44 insertions(+), 13 deletions(-) diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go index 949f95fca..cc472887d 100644 --- a/cmd/webhook/main.go +++ b/cmd/webhook/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "fmt" + "net/http" "github.com/open-policy-agent/cert-controller/pkg/rotator" "k8s.io/apimachinery/pkg/api/meta" @@ -13,7 +14,6 @@ import ( "monis.app/mlog" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client/apiutil" - "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/manager/signals" @@ -146,14 +146,7 @@ func mainErr() error { close(setupFinished) } - if err := mgr.AddReadyzCheck("ping", healthz.Ping); err != nil { - return fmt.Errorf("entrypoint: unable to create ready check: %w", err) - } - - if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil { - return fmt.Errorf("entrypoint: unable to create health check: %w", err) - } - + setupProbeEndpoints(mgr, setupFinished) go setupWebhook(mgr, setupFinished) entryLog.Info("starting manager") @@ -168,11 +161,10 @@ func setupWebhook(mgr manager.Manager, setupFinished chan struct{}) { // Block until the setup (certificate generation) finishes. <-setupFinished - // setup webhooks - entryLog.Info("setting up webhook server") hookServer := mgr.GetWebhookServer() hookServer.TLSMinVersion = tlsMinVersion + // setup webhooks entryLog.Info("registering webhook to the webhook server") podMutator, err := wh.NewPodMutator(mgr.GetClient(), mgr.GetAPIReader(), arcCluster, audience) if err != nil { @@ -180,3 +172,27 @@ func setupWebhook(mgr manager.Manager, setupFinished chan struct{}) { } hookServer.Register("/mutate-v1-pod", &webhook.Admission{Handler: podMutator}) } + +func setupProbeEndpoints(mgr ctrl.Manager, setupFinished chan struct{}) { + // Block readiness on the mutating webhook being registered. + // We can't use mgr.GetWebhookServer().StartedChecker() yet, + // because that starts the webhook. But we also can't call AddReadyzCheck + // after Manager.Start. So we need a custom ready check that delegates to + // the real ready check after the cert has been injected and validator started. + checker := func(req *http.Request) error { + select { + case <-setupFinished: + return mgr.GetWebhookServer().StartedChecker()(req) + default: + return fmt.Errorf("certs are not ready yet") + } + } + + if err := mgr.AddHealthzCheck("healthz", checker); err != nil { + panic(fmt.Errorf("unable to add healthz check: %w", err)) + } + if err := mgr.AddReadyzCheck("readyz", checker); err != nil { + panic(fmt.Errorf("unable to add readyz check: %w", err)) + } + entryLog.Info("added healthz and readyz check") +} diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 582b6c1aa..f993b8174 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -52,10 +52,15 @@ spec: httpGet: path: /readyz port: healthz + initialDelaySeconds: 5 + periodSeconds: 5 livenessProbe: httpGet: path: /healthz port: healthz + initialDelaySeconds: 15 + periodSeconds: 20 + failureThreshold: 6 resources: limits: cpu: 100m diff --git a/manifest_staging/charts/workload-identity-webhook/templates/azure-wi-webhook-controller-manager-deployment.yaml b/manifest_staging/charts/workload-identity-webhook/templates/azure-wi-webhook-controller-manager-deployment.yaml index 94abe69ea..761c28d12 100644 --- a/manifest_staging/charts/workload-identity-webhook/templates/azure-wi-webhook-controller-manager-deployment.yaml +++ b/manifest_staging/charts/workload-identity-webhook/templates/azure-wi-webhook-controller-manager-deployment.yaml @@ -47,9 +47,12 @@ spec: image: '{{ .Values.image.repository }}:{{ .Values.image.release }}' imagePullPolicy: '{{ .Values.image.pullPolicy }}' livenessProbe: + failureThreshold: 6 httpGet: path: /healthz port: healthz + initialDelaySeconds: 15 + periodSeconds: 20 name: manager ports: - containerPort: {{ trimPrefix ":" .Values.metricsAddr }} @@ -65,6 +68,8 @@ spec: httpGet: path: /readyz port: healthz + initialDelaySeconds: 5 + periodSeconds: 5 resources: {{- toYaml .Values.resources | nindent 10 }} securityContext: diff --git a/manifest_staging/deploy/azure-wi-webhook.yaml b/manifest_staging/deploy/azure-wi-webhook.yaml index b32a59bfa..94dc1ab51 100644 --- a/manifest_staging/deploy/azure-wi-webhook.yaml +++ b/manifest_staging/deploy/azure-wi-webhook.yaml @@ -171,9 +171,12 @@ spec: image: mcr.microsoft.com/oss/azure/workload-identity/webhook:v0.15.0 imagePullPolicy: IfNotPresent livenessProbe: + failureThreshold: 6 httpGet: path: /healthz port: healthz + initialDelaySeconds: 15 + periodSeconds: 20 name: manager ports: - containerPort: 9443 @@ -189,6 +192,8 @@ spec: httpGet: path: /readyz port: healthz + initialDelaySeconds: 5 + periodSeconds: 5 resources: limits: cpu: 100m diff --git a/scripts/ci-e2e.sh b/scripts/ci-e2e.sh index 5ed9c8e29..00139b7cb 100755 --- a/scripts/ci-e2e.sh +++ b/scripts/ci-e2e.sh @@ -66,7 +66,7 @@ main() { create_cluster make deploy - poll_webhook_readiness + ${KUBECTL} wait --for=condition=available --timeout=5m deployment/azure-wi-webhook-controller-manager -n azure-workload-identity-system if [[ -n "${WINDOWS_NODE_NAME:-}" ]]; then E2E_ARGS="--node-os-distro=windows ${E2E_ARGS:-}" @@ -106,7 +106,7 @@ test_helm_chart() { --wait \ --debug \ -v=5 - poll_webhook_readiness + ${KUBECTL} wait --for=condition=available --timeout=5m deployment/azure-wi-webhook-controller-manager -n azure-workload-identity-system make test-e2e-run }