Skip to content

Commit

Permalink
feat: Add Deployments for E2E Tests (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
ishaansehgal99 committed Oct 13, 2023
1 parent 64daef4 commit 4801723
Show file tree
Hide file tree
Showing 36 changed files with 460 additions and 267 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ hack/tools/bin/*

# presets

pkg/presets/llama-2/weights
pkg/presets/llama-2-chat/weights
presets/llama-2/weights
presets/llama-2-chat/weights
presets/falcon/weights
4 changes: 2 additions & 2 deletions docker/presets/falcon/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ WORKDIR /workspace/huggingface
# First, copy just the requirements.txt file and install dependencies
# This is done before copying the code to utilize Docker's layer caching and
# avoid reinstalling dependencies unless the requirements file changes.
COPY pkg/presets/falcon/requirements.txt ./requirements.txt
COPY presets/falcon/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Copy the entire 'presets/falcon' folder to the working directory
COPY pkg/presets/falcon .
COPY presets/falcon .
2 changes: 1 addition & 1 deletion docker/presets/llama-2-chat/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ RUN pip install -e .
RUN pip install fastapi pydantic
RUN pip install 'uvicorn[standard]'

ADD pkg/presets/llama-2-chat /workspace/llama/llama-2-chat
ADD presets/llama-2-chat /workspace/llama/llama-2-chat
2 changes: 1 addition & 1 deletion docker/presets/llama-2/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ RUN pip install -e .
RUN pip install fastapi pydantic
RUN pip install 'uvicorn[standard]'

ADD pkg/presets/llama-2 /workspace/llama/llama-2
ADD presets/llama-2 /workspace/llama/llama-2
File renamed without changes.
File renamed without changes.
72 changes: 0 additions & 72 deletions pkg/presets/convert/llama-2-13b-chat.yaml

This file was deleted.

63 changes: 0 additions & 63 deletions pkg/presets/convert/llama-2-7b-chat.yaml

This file was deleted.

61 changes: 0 additions & 61 deletions pkg/presets/convert/llama-2-7b.yaml

This file was deleted.

37 changes: 0 additions & 37 deletions pkg/presets/convert/pod-2GPU.yaml

This file was deleted.

28 changes: 0 additions & 28 deletions pkg/presets/convert/pod.yaml

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 14 additions & 0 deletions presets/k8s/falcon-7b-instruct/falcon-7b-instruct-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: falcon-7b-instruct
spec:
selector:
app: falcon
statefulset.kubernetes.io/pod-name: falcon-7b-instruct-0
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
51 changes: 51 additions & 0 deletions presets/k8s/falcon-7b-instruct/falcon-7b-instruct-statefulset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: falcon-7b-instruct
spec:
replicas: 1
selector:
matchLabels:
app: falcon
podManagementPolicy: Parallel
template:
metadata:
labels:
app: falcon
spec:
containers:
- name: falcon-container
image: REPO_HERE.azurecr.io/falcon-7b-instruct:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --config_file config.yaml --num_processes 1 --num_machines 1 --use_deepspeed --machine_rank 0 --gpu_ids all inference-api.py
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: on7binstruct
15 changes: 15 additions & 0 deletions presets/k8s/falcon-7b/falcon-7b-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: falcon-7b
spec:
selector:
app: falcon
statefulset.kubernetes.io/pod-name: falcon-7b-0
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true

Loading

0 comments on commit 4801723

Please sign in to comment.