Skip to content

Commit

Permalink
build: initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
YoucefGuichi committed May 9, 2024
1 parent 5fc3983 commit 45de52f
Show file tree
Hide file tree
Showing 7 changed files with 260 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore everything
**

# But include
!gemma.py
!requirements.txt
57 changes: 57 additions & 0 deletions .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: 🚀 Publish

on:
push:
tags:
- 'gemma-wrapper-v*'

jobs:
publish:
runs-on: ubuntu-latest
permissions:
contents: write # Required for creating the GitHub release
packages: write # Required for pushing to GitHub Container Registry
steps:
- name: ⬇️ Check out code
uses: actions/checkout@v3
- name: Version
id: version
run: |
if [[ $GITHUB_REF == refs/tags/* ]]
then
tag=${GITHUB_REF##refs/tags/}
v=${tag##gemma-wrapper-}
echo "::set-output name=version::$v"
else
echo "::set-output name=version::$GITHUB_SHA"
fi
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ github.ref }}
release_name: Gemma Wrapper ${{ steps.version.outputs.version }}
draft: false
prerelease: false
- name: Set up QEMU
uses: docker/[email protected]
- name: Set up Docker Buildx
uses: docker/[email protected]
- name: Login to GitHub Container Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.PAT }} # `PAT` is a secret that contains your Personal Access Token with `write:packages` scope
- name: Build and push image
id: build-and-push
uses: docker/[email protected]
with:
context: .
file: Dockerfile
platforms: linux/amd64,linux/arm64/v8
push: true
tags: |
ghcr.io/biznesbees/gemma-wrapper:${{ steps.version.outputs.version }}
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
venv
__pycache__
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM python:3.11.9-slim

WORKDIR /app


COPY . /app/

RUN apt-get update && apt-get install -y gcc python3-dev
RUN pip install -r requirements.txt


EXPOSE 8080

CMD ["uvicorn", "gemma:app", "--host", "0.0.0.0", "--port", "8080"]
69 changes: 69 additions & 0 deletions deployment/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
---
# Source: onechart/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: gemma
namespace: default
labels:
helm.sh/chart: onechart-0.66.0
app.kubernetes.io/name: onechart
app.kubernetes.io/instance: gemma
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
selector:
app.kubernetes.io/name: onechart
app.kubernetes.io/instance: gemma
---
# Source: onechart/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: gemma
namespace: default
labels:
helm.sh/chart: onechart-0.66.0
app.kubernetes.io/name: onechart
app.kubernetes.io/instance: gemma
app.kubernetes.io/managed-by: Helm
annotations:
kubectl.kubernetes.io/default-container: gemma
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: onechart
app.kubernetes.io/instance: gemma
template:
metadata:
annotations:
checksum/config: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
labels:
app.kubernetes.io/name: onechart
app.kubernetes.io/instance: gemma
spec:
containers:
- image: nginx:latest
imagePullPolicy: IfNotPresent
name: gemma
ports:
- containerPort: 80
name: http
protocol: TCP
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 200m
memory: 200Mi
securityContext: {}
initContainers: null
securityContext:
fsGroup: 999
29 changes: 29 additions & 0 deletions gemma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from fastapi import FastAPI
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

app = FastAPI()

tokenizer = AutoTokenizer.from_pretrained(
"google/gemma-2b",
token=ACCESS_TOKEN,
force_download=True)

model = AutoModelForCausalLM.from_pretrained(
"google/gemma-2b",
torch_dtype=torch.bfloat16,
device_map="auto",
token=ACCESS_TOKEN
)


@app.post("/generate/")
async def generate_text(data: dict):
input_text = data.get("input_text")
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**input_ids)
generated_text = tokenizer.decode(outputs[0])
return {"generated_text": generated_text}
83 changes: 83 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
accelerate==0.30.0
annotated-types==0.6.0
anyio==4.3.0
asttokens==2.4.1
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
decorator==5.1.1
dnspython==2.6.1
email_validator==2.1.1
exceptiongroup==1.2.1
executing==2.0.1
fastapi==0.111.0
fastapi-cli==0.0.2
filelock==3.14.0
fsspec==2024.3.1
h11==0.14.0
httpcore==1.0.5
httptools==0.6.1
httpx==0.27.0
huggingface-hub==0.23.0
idna==3.7
ipython==8.24.0
jedi==0.19.1
Jinja2==3.1.3
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib-inline==0.1.7
mdurl==0.1.2
mpmath==1.3.0
networkx==3.3
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.1.105
orjson==3.10.3
packaging==24.0
parso==0.8.4
pexpect==4.9.0
prompt-toolkit==3.0.43
psutil==5.9.8
ptyprocess==0.7.0
pure-eval==0.2.2
pydantic==2.7.1
pydantic_core==2.18.2
Pygments==2.18.0
python-dotenv==1.0.1
python-multipart==0.0.9
PyYAML==6.0.1
regex==2024.4.28
requests==2.31.0
rich==13.7.1
safetensors==0.4.3
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
stack-data==0.6.3
starlette==0.37.2
sympy==1.12
tokenizers==0.19.1
torch==2.3.0
tqdm==4.66.4
traitlets==5.14.3
transformers==4.40.1
triton==2.3.0
typer==0.12.3
typing_extensions==4.11.0
ujson==5.9.0
urllib3==2.2.1
uvicorn==0.29.0
uvloop==0.19.0
watchfiles==0.21.0
wcwidth==0.2.13
websockets==12.0

0 comments on commit 45de52f

Please sign in to comment.