update v1beta docs

oguzhan-yilmaz · Nov 4, 2023 · 30bffe0 · 30bffe0
1 parent 588c77f
commit 30bffe0
Show file tree

Hide file tree

Showing 7 changed files with 297 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
 docs/node-role-trust-relationship.json
 docs/trash.md
-site/
+site/
+.vscode
+karpenter-cloudformation.yaml
diff --git a/docs/eksdemo-secondary-cidr-and-cni-custom-netwoking/troubleshooting.md b/docs/eksdemo-secondary-cidr-and-cni-custom-netwoking/troubleshooting.md
@@ -1,5 +1,15 @@
 # Troubleshooting Guide
 
+### Find which resources are tagged with `karpenter.sh/discovery=${CLUSTER_NAME}`
+
+```bash
+# List all resources with the tag: Key=karpenter.sh/discovery,Values=${CLUSTER_NAME}
+aws resourcegroupstaggingapi get-resources \
+    --tag-filters "Key=karpenter.sh/discovery,Values=${CLUSTER_NAME}" \
+    --query 'ResourceTagMappingList[]' --output text \
+    | sed 's/arn:/\n----------\narn:/g'
+```
+
 ### Helpful bash functions
 
 ```bash

diff --git a/docs/karpenter-v1beta-configuration.md b/docs/karpenter-v1beta-configuration.md
@@ -1 +1,89 @@
-coming soon...
+# Demo: Karpenter v1beta
+https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh
+
+- karpenter.k8s.aws/instance-pods https://karpenter.sh/docs/reference/instance-types/
+  - check this node label!
+- https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/linux/README.md
+  - collect logs of userdata + kubelet???
+
+## Installation
+
+```bash title="Export env. variables we will use in this demo"
+export KARPENTER_VERSION=v0.32.1
+export K8S_VERSION=1.24
+export AWS_PAGER=""                          # disable the aws cli pager
+export AWS_PROFILE=hepapi
+export AWS_REGION=eu-central-1
+export CLUSTER_NAME="bambi"                 # will be created with eksctl
+```
+
+### Create an EKS cluster with Karpenter
+
+```bash
+eksctl create cluster -f - <<EOF
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+  name: ${CLUSTER_NAME}
+  region: ${AWS_REGION}
+  version: "${K8S_VERSION}"
+  tags:
+    karpenter.sh/discovery: ${CLUSTER_NAME} # here, it is set to the cluster name
+iam:
+  withOIDC: true 
+
+karpenter:
+  version: "${KARPENTER_VERSION}"
+  createServiceAccount: true 
+  defaultInstanceProfile: "KarpenterNodeInstanceProfile-${CLUSTER_NAME}"
+  withSpotInterruptionQueue: true 
+
+managedNodeGroups:
+- instanceType: m5.large
+  amiFamily: AmazonLinux2
+  name: ${CLUSTER_NAME}-ng
+  desiredCapacity: 2
+  minSize: 1
+  maxSize: 5
+EOF
+```
+
+### Find all AWS resources tagged with `karpenter.sh/discovery=${CLUSTER_NAME}`
+
+Check if you got everything tagged correctly.
+
+It should include the following resources (just as your original EKS NodeGroup):
+
+- Private Subnets
+- Cluster SecurityGroup
+
+```bash
+# List all resources with the tag: 'karpenter.sh/discovery=${CLUSTER_NAME}'
+aws resourcegroupstaggingapi get-resources \
+    --tag-filters "Key=karpenter.sh/discovery,Values=${CLUSTER_NAME}" \
+    --query 'ResourceTagMappingList[]' --output text \
+    | sed 's/^arn:/\n----------\narn:/g'
+```
+
+
+
+```bash title="Check Karpenter logs"
+kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter
+```
+
+- Maybe set hostNetwork: true ?
+
+```bash title="Check interruped sqs queue"
+aws sqs get-queue-attributes \
+    --queue-url "https://sqs.${AWS_REGION}.amazonaws.com/${ACCOUNT_ID}/${CLUSTER_NAME}" \
+    --attribute-names ApproximateNumberOfMessages --no-cli-pager --query 'Attributes'
+```
+
+### Scale up the cluster
+
+```bash title="Scale up the cluster"
+
+
+
+```
diff --git a/errors.md b/errors.md
@@ -0,0 +1,3 @@
+{"level":"DEBUG","time":"2023-11-04T14:48:06.726Z","logger":"controller.provisioner","message":"ignoring pod, configured to not run on a Karpenter provisioned node via karpenter.sh/provisioner-name DoesNotExist and karpenter.sh/nodepool DoesNotExist requirements","commit":"1072d3b","pod":"karpenter/karpenter-5b54f6d889-xfnfm"}
+{"level":"ERROR","time":"2023-11-04T14:48:06.920Z","logger":"controller.interruption","message":"getting messages from queue, discovering queue url, fetching queue url, AWS.SimpleQueueService.NonExistentQueue: The specified queue does not exist for this wsdl version.\n\tstatus code: 400, request id: 88c274b8-db81-530d-89d2-297880676fbb","commit":"1072d3b"}
+{"level":"ERROR","time":"2023-11-04T14:48:07.189Z","logger":"controller.pricing","message":"retreiving on-demand pricing data, AccessDeniedException: User: arn:aws:sts::995194808144:assumed-role/eksctl-baby-nodegroup-main-NodeInstanceRole-XImhCLNcSot2/i-0d47b79a18b89479f is not authorized to perform: pricing:GetProducts because no identity-based policy allows the pricing:GetProducts action; AccessDeniedException: User: arn:aws:sts::995194808144:assumed-role/eksctl-baby-nodegroup-main-NodeInstanceRole-XImhCLNcSot2/i-0d47b79a18b89479f is not authorized to perform: pricing:GetProducts because no identity-based policy allows the pricing:GetProducts action; retrieving spot pricing data, UnauthorizedOperation: You are not authorized to perform this operation. User: arn:aws:sts::995194808144:assumed-role/eksctl-baby-nodegroup-main-NodeInstanceRole-XImhCLNcSot2/i-0d47b79a18b89479f is not authorized to perform: ec2:DescribeSpotPriceHistory because no identity-based policy allows the ec2:DescribeSpotPriceHistory action\n\tstatus code: 403, request id: f2c9f7f2-b89a-41d2-8e18-f317893603a2","commit":"1072d3b"}
diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,22 @@
+# Karpenter v1beta Configuration Examples
+
+
+## Index
+- 
+
+
+```bash
+export CLUSTER_NAME="bambi" 
+
+
+cd examples/v1beta
+envsubst < provisioner/spot.yaml
+
+envsubst < examples/v1beta/node-pool.yaml | tee | kubectl apply -f -
+envsubst < examples/v1beta/ec2-node-class.yaml | tee | kubectl apply -f -
+
+
+
+```
+
+
diff --git a/examples/v1beta/ec2-node-class.yaml b/examples/v1beta/ec2-node-class.yaml
@@ -0,0 +1,44 @@
+apiVersion: karpenter.k8s.aws/v1beta1
+kind: EC2NodeClass
+metadata:
+  name: default
+spec:
+  # required, resolves a default ami and userdata
+  amiFamily: AL2                
+
+  # required, discovers subnets to attach to instances
+  subnetSelectorTerms:          
+    - tags:
+        karpenter.sh/discovery: "${CLUSTER_NAME}"
+
+  # required, discovers security groups to attach to instances
+  securityGroupSelectorTerms:   
+    - tags:
+        karpenter.sh/discovery: "${CLUSTER_NAME}"
+
+  # required, IAM role to use for the node identity
+  role: "KarpenterNodeRole-${CLUSTER_NAME}"
+
+  # optional, discovers amis to override the amiFamily's default
+  amiSelectorTerms:             
+    - tags:
+        karpenter.sh/discovery: "${CLUSTER_NAME}"
+  # optional, overrides autogenerated userdata with a merge semantic
+  userData: |
+    #!/bin/bash
+    echo "Userdata script: $(date)"
+    echo "Installing SSM Agent"
+    sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
+    sudo systemctl status amazon-ssm-agent             
+  # optional, propagates tags to underlying EC2 resources
+  tags:                  
+    team: team-a
+    app: team-a-app
+  # optional, configures storage devices for the instance
+  blockDeviceMappings:
+    - deviceName: /dev/xvda
+      ebs:
+        volumeSize: 60Gi
+        volumeType: gp3
+  # optional, configures detailed monitoring for the instance
+  # detailedMonitoring: true
diff --git a/examples/v1beta/node-pool.yaml b/examples/v1beta/node-pool.yaml
@@ -0,0 +1,126 @@
+apiVersion: karpenter.sh/v1beta1
+kind: NodePool
+metadata:
+  name: default
+spec:
+  # Template section that describes how to template out NodeClaim resources that Karpenter will provision
+  # Karpenter will consider this template to be the minimum requirements needed to provision a Node using this NodePool
+  # It will overlay this NodePool with Pods that need to schedule to further constrain the NodeClaims
+  # Karpenter will provision to launch new Nodes for the cluster
+  template:
+    metadata:
+      # Labels are arbitrary key-values that are applied to all nodes
+      labels:
+        billing-team: my-team
+
+      # Annotations are arbitrary key-values that are applied to all nodes
+      annotations:
+        example.com/owner: "my-team"
+    spec:
+      # References the Cloud Provider's NodeClass resource, see your cloud provider specific documentation
+      nodeClassRef:
+        apiVersion: karpenter.k8s.aws/v1beta1
+        kind: EC2NodeClass
+        name: default
+
+      # Provisioned nodes will have these taints
+      # Taints may prevent pods from scheduling if they are not tolerated by the pod.
+      # taints:
+      #   - key: example.com/special-taint
+      #     effect: NoSchedule
+
+      # Provisioned nodes will have these taints, but pods do not need to tolerate these taints to be provisioned by this
+      # NodePool. These taints are expected to be temporary and some other entity (e.g. a DaemonSet) is responsible for
+      # removing the taint after it has finished initializing the node.
+      # startupTaints:
+      #   - key: example.com/another-taint
+      #     effect: NoSchedule
+
+      # Requirements that constrain the parameters of provisioned nodes.
+      # These requirements are combined with pod.spec.topologySpreadConstraints, pod.spec.affinity.nodeAffinity, pod.spec.affinity.podAffinity, and pod.spec.nodeSelector rules.
+      # Operators { In, NotIn, Exists, DoesNotExist, Gt, and Lt } are supported.
+      # https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#operators
+      requirements:
+        - key: "karpenter.k8s.aws/instance-category"
+          operator: In
+          values: ["c", "m", "r"]
+        - key: "karpenter.k8s.aws/instance-cpu"
+          operator: In
+          values: ["4", "8", "16", "32"]
+        - key: "karpenter.k8s.aws/instance-hypervisor"
+          operator: In
+          values: ["nitro"]
+        - key: "karpenter.k8s.aws/instance-generation"
+          operator: Gt
+          values: ["2"]
+        # - key: "topology.kubernetes.io/zone"
+        #   operator: In
+        #   values: ["us-west-2a", "us-west-2b"]
+        - key: "kubernetes.io/arch"
+          operator: In
+          values: ["arm64", "amd64"]
+        - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand
+          operator: In
+          values: ["spot", "on-demand"]
+
+      # Karpenter provides the ability to specify a few additional Kubelet args.
+      # These are all optional and provide support for additional customization and use cases.
+      kubelet:
+        # clusterDNS: ["10.0.1.100"]
+        # containerRuntime: containerd
+        systemReserved:
+          cpu: 100m
+          memory: 100Mi
+          ephemeral-storage: 1Gi
+        kubeReserved:
+          cpu: 200m
+          memory: 100Mi
+          ephemeral-storage: 3Gi
+        # evictionHard:
+        #   memory.available: 5%
+        #   nodefs.available: 10%
+        #   nodefs.inodesFree: 10%
+        # evictionSoft:
+        #   memory.available: 500Mi
+        #   nodefs.available: 15%
+        #   nodefs.inodesFree: 15%
+        # evictionSoftGracePeriod:
+        #   memory.available: 1m
+        #   nodefs.available: 1m30s
+        #   nodefs.inodesFree: 2m
+        # evictionMaxPodGracePeriod: 60
+        # imageGCHighThresholdPercent: 85
+        # imageGCLowThresholdPercent: 80
+        # cpuCFSQuota: true
+        # podsPerCore: 2
+        # maxPods: 20
+
+  # Disruption section which describes the ways in which Karpenter can disrupt and replace Nodes
+  # Configuration in this section constrains how aggressive Karpenter can be with performing operations
+  # like rolling Nodes due to them hitting their maximum lifetime (expiry) or scaling down nodes to reduce cluster cost
+  disruption:
+    # Describes which types of Nodes Karpenter should consider for consolidation
+    # If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost
+    # If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods
+    consolidationPolicy: WhenUnderutilized
+
+    # The amount of time Karpenter should wait after discovering a consolidation decision
+    # This value can currently only be set when the consolidationPolicy is 'WhenEmpty'
+    # You can choose to disable consolidation entirely by setting the string value 'Never' here
+    consolidateAfter: 60s
+
+    # The amount of time a Node can live on the cluster before being removed
+    # Avoiding long-running Nodes helps to reduce security vulnerabilities as well as to reduce the chance of issues that can plague Nodes with long uptimes such as file fragmentation or memory leaks from system processes
+    # You can choose to disable expiration entirely by setting the string value 'Never' here
+    expireAfter: 720h
+
+  # Resource limits constrain the total size of the cluster.
+  # Limits prevent Karpenter from creating new instances once the limit is exceeded.
+  limits:
+    cpu: "100"
+    memory: 200Gi
+
+  # Priority given to the NodePool when the scheduler considers which NodePool
+  # to select. Higher weights indicate higher priority when comparing NodePools.
+  # Specifying no weight is equivalent to specifying a weight of 0.
+  weight: 10