build: initial commit

biznesbees · May 9, 2024 · 45de52f · 45de52f
1 parent 5fc3983
commit 45de52f
Show file tree

Hide file tree

Showing 7 changed files with 260 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,6 @@
+# Ignore everything
+**
+
+# But include
+!gemma.py
+!requirements.txt
diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
@@ -0,0 +1,57 @@
+name: 🚀 Publish
+
+on:
+  push:
+    tags:
+      - 'gemma-wrapper-v*'
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write # Required for creating the GitHub release
+      packages: write # Required for pushing to GitHub Container Registry
+    steps:
+    - name: ⬇️ Check out code
+      uses: actions/checkout@v3
+    - name: Version
+      id: version
+      run: |
+        if [[ $GITHUB_REF == refs/tags/* ]]
+        then
+            tag=${GITHUB_REF##refs/tags/}
+            v=${tag##gemma-wrapper-}
+            echo "::set-output name=version::$v"
+        else
+            echo "::set-output name=version::$GITHUB_SHA"
+        fi
+    - name: Create Release
+      id: create_release
+      uses: actions/create-release@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      with:
+        tag_name: ${{ github.ref }}
+        release_name: Gemma Wrapper ${{ steps.version.outputs.version }}
+        draft: false
+        prerelease: false
+    - name: Set up QEMU
+      uses: docker/[email protected]
+    - name: Set up Docker Buildx
+      uses: docker/[email protected]
+    - name: Login to GitHub Container Registry
+      uses: docker/login-action@v1
+      with:
+        registry: ghcr.io
+        username: ${{ github.repository_owner }}
+        password: ${{ secrets.PAT }} # `PAT` is a secret that contains your Personal Access Token with `write:packages` scope
+    - name: Build and push image
+      id: build-and-push
+      uses: docker/[email protected]
+      with:
+        context: .
+        file: Dockerfile
+        platforms: linux/amd64,linux/arm64/v8
+        push: true
+        tags: |
+          ghcr.io/biznesbees/gemma-wrapper:${{ steps.version.outputs.version }}
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+venv
+__pycache__
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.11.9-slim
+
+WORKDIR /app
+
+
+COPY . /app/
+
+RUN apt-get update && apt-get install -y gcc python3-dev
+RUN pip install -r requirements.txt
+
+
+EXPOSE 8080
+
+CMD ["uvicorn", "gemma:app", "--host", "0.0.0.0", "--port", "8080"]
diff --git a/deployment/deployment.yaml b/deployment/deployment.yaml
@@ -0,0 +1,69 @@
+---
+# Source: onechart/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: gemma
+  namespace: default
+  labels:
+    helm.sh/chart: onechart-0.66.0
+    app.kubernetes.io/name: onechart
+    app.kubernetes.io/instance: gemma
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    app.kubernetes.io/name: onechart
+    app.kubernetes.io/instance: gemma
+---
+# Source: onechart/templates/deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gemma
+  namespace: default
+  labels:
+    helm.sh/chart: onechart-0.66.0
+    app.kubernetes.io/name: onechart
+    app.kubernetes.io/instance: gemma
+    app.kubernetes.io/managed-by: Helm
+  annotations:
+    kubectl.kubernetes.io/default-container: gemma
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: onechart
+      app.kubernetes.io/instance: gemma
+  template:
+    metadata:
+      annotations:
+        checksum/config: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+      labels:
+        app.kubernetes.io/name: onechart
+        app.kubernetes.io/instance: gemma
+    spec:
+      containers:
+      - image: nginx:latest
+        imagePullPolicy: IfNotPresent
+        name: gemma
+        ports:
+        - containerPort: 80
+          name: http
+          protocol: TCP
+        resources:
+          limits:
+            cpu: 200m
+            memory: 200Mi
+          requests:
+            cpu: 200m
+            memory: 200Mi
+        securityContext: {}
+      initContainers: null
+      securityContext:
+        fsGroup: 999
diff --git a/gemma.py b/gemma.py
@@ -0,0 +1,29 @@
+from fastapi import FastAPI
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import os
+
+ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
+
+app = FastAPI()
+
+tokenizer = AutoTokenizer.from_pretrained(
+    "google/gemma-2b",
+    token=ACCESS_TOKEN,
+    force_download=True)
+
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    token=ACCESS_TOKEN
+)
+
+
+@app.post("/generate/")
+async def generate_text(data: dict):
+    input_text = data.get("input_text")
+    input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+    outputs = model.generate(**input_ids)
+    generated_text = tokenizer.decode(outputs[0])
+    return {"generated_text": generated_text}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,83 @@
+accelerate==0.30.0
+annotated-types==0.6.0
+anyio==4.3.0
+asttokens==2.4.1
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+decorator==5.1.1
+dnspython==2.6.1
+email_validator==2.1.1
+exceptiongroup==1.2.1
+executing==2.0.1
+fastapi==0.111.0
+fastapi-cli==0.0.2
+filelock==3.14.0
+fsspec==2024.3.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.0
+idna==3.7
+ipython==8.24.0
+jedi==0.19.1
+Jinja2==3.1.3
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.3
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.1.105
+orjson==3.10.3
+packaging==24.0
+parso==0.8.4
+pexpect==4.9.0
+prompt-toolkit==3.0.43
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pydantic==2.7.1
+pydantic_core==2.18.2
+Pygments==2.18.0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+PyYAML==6.0.1
+regex==2024.4.28
+requests==2.31.0
+rich==13.7.1
+safetensors==0.4.3
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+stack-data==0.6.3
+starlette==0.37.2
+sympy==1.12
+tokenizers==0.19.1
+torch==2.3.0
+tqdm==4.66.4
+traitlets==5.14.3
+transformers==4.40.1
+triton==2.3.0
+typer==0.12.3
+typing_extensions==4.11.0
+ujson==5.9.0
+urllib3==2.2.1
+uvicorn==0.29.0
+uvloop==0.19.0
+watchfiles==0.21.0
+wcwidth==0.2.13
+websockets==12.0