-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5fc3983
commit 45de52f
Showing
7 changed files
with
260 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Ignore everything | ||
** | ||
|
||
# But include | ||
!gemma.py | ||
!requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
name: 🚀 Publish | ||
|
||
on: | ||
push: | ||
tags: | ||
- 'gemma-wrapper-v*' | ||
|
||
jobs: | ||
publish: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: write # Required for creating the GitHub release | ||
packages: write # Required for pushing to GitHub Container Registry | ||
steps: | ||
- name: ⬇️ Check out code | ||
uses: actions/checkout@v3 | ||
- name: Version | ||
id: version | ||
run: | | ||
if [[ $GITHUB_REF == refs/tags/* ]] | ||
then | ||
tag=${GITHUB_REF##refs/tags/} | ||
v=${tag##gemma-wrapper-} | ||
echo "::set-output name=version::$v" | ||
else | ||
echo "::set-output name=version::$GITHUB_SHA" | ||
fi | ||
- name: Create Release | ||
id: create_release | ||
uses: actions/create-release@v1 | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
with: | ||
tag_name: ${{ github.ref }} | ||
release_name: Gemma Wrapper ${{ steps.version.outputs.version }} | ||
draft: false | ||
prerelease: false | ||
- name: Set up QEMU | ||
uses: docker/[email protected] | ||
- name: Set up Docker Buildx | ||
uses: docker/[email protected] | ||
- name: Login to GitHub Container Registry | ||
uses: docker/login-action@v1 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.repository_owner }} | ||
password: ${{ secrets.PAT }} # `PAT` is a secret that contains your Personal Access Token with `write:packages` scope | ||
- name: Build and push image | ||
id: build-and-push | ||
uses: docker/[email protected] | ||
with: | ||
context: . | ||
file: Dockerfile | ||
platforms: linux/amd64,linux/arm64/v8 | ||
push: true | ||
tags: | | ||
ghcr.io/biznesbees/gemma-wrapper:${{ steps.version.outputs.version }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
venv | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM python:3.11.9-slim | ||
|
||
WORKDIR /app | ||
|
||
|
||
COPY . /app/ | ||
|
||
RUN apt-get update && apt-get install -y gcc python3-dev | ||
RUN pip install -r requirements.txt | ||
|
||
|
||
EXPOSE 8080 | ||
|
||
CMD ["uvicorn", "gemma:app", "--host", "0.0.0.0", "--port", "8080"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
--- | ||
# Source: onechart/templates/service.yaml | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: gemma | ||
namespace: default | ||
labels: | ||
helm.sh/chart: onechart-0.66.0 | ||
app.kubernetes.io/name: onechart | ||
app.kubernetes.io/instance: gemma | ||
app.kubernetes.io/managed-by: Helm | ||
spec: | ||
type: ClusterIP | ||
ports: | ||
- port: 80 | ||
targetPort: http | ||
protocol: TCP | ||
name: http | ||
selector: | ||
app.kubernetes.io/name: onechart | ||
app.kubernetes.io/instance: gemma | ||
--- | ||
# Source: onechart/templates/deployment.yaml | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: gemma | ||
namespace: default | ||
labels: | ||
helm.sh/chart: onechart-0.66.0 | ||
app.kubernetes.io/name: onechart | ||
app.kubernetes.io/instance: gemma | ||
app.kubernetes.io/managed-by: Helm | ||
annotations: | ||
kubectl.kubernetes.io/default-container: gemma | ||
spec: | ||
replicas: 1 | ||
selector: | ||
matchLabels: | ||
app.kubernetes.io/name: onechart | ||
app.kubernetes.io/instance: gemma | ||
template: | ||
metadata: | ||
annotations: | ||
checksum/config: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 | ||
labels: | ||
app.kubernetes.io/name: onechart | ||
app.kubernetes.io/instance: gemma | ||
spec: | ||
containers: | ||
- image: nginx:latest | ||
imagePullPolicy: IfNotPresent | ||
name: gemma | ||
ports: | ||
- containerPort: 80 | ||
name: http | ||
protocol: TCP | ||
resources: | ||
limits: | ||
cpu: 200m | ||
memory: 200Mi | ||
requests: | ||
cpu: 200m | ||
memory: 200Mi | ||
securityContext: {} | ||
initContainers: null | ||
securityContext: | ||
fsGroup: 999 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from fastapi import FastAPI | ||
from transformers import AutoTokenizer, AutoModelForCausalLM | ||
import torch | ||
import os | ||
|
||
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN") | ||
|
||
app = FastAPI() | ||
|
||
tokenizer = AutoTokenizer.from_pretrained( | ||
"google/gemma-2b", | ||
token=ACCESS_TOKEN, | ||
force_download=True) | ||
|
||
model = AutoModelForCausalLM.from_pretrained( | ||
"google/gemma-2b", | ||
torch_dtype=torch.bfloat16, | ||
device_map="auto", | ||
token=ACCESS_TOKEN | ||
) | ||
|
||
|
||
@app.post("/generate/") | ||
async def generate_text(data: dict): | ||
input_text = data.get("input_text") | ||
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda") | ||
outputs = model.generate(**input_ids) | ||
generated_text = tokenizer.decode(outputs[0]) | ||
return {"generated_text": generated_text} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
accelerate==0.30.0 | ||
annotated-types==0.6.0 | ||
anyio==4.3.0 | ||
asttokens==2.4.1 | ||
certifi==2024.2.2 | ||
charset-normalizer==3.3.2 | ||
click==8.1.7 | ||
decorator==5.1.1 | ||
dnspython==2.6.1 | ||
email_validator==2.1.1 | ||
exceptiongroup==1.2.1 | ||
executing==2.0.1 | ||
fastapi==0.111.0 | ||
fastapi-cli==0.0.2 | ||
filelock==3.14.0 | ||
fsspec==2024.3.1 | ||
h11==0.14.0 | ||
httpcore==1.0.5 | ||
httptools==0.6.1 | ||
httpx==0.27.0 | ||
huggingface-hub==0.23.0 | ||
idna==3.7 | ||
ipython==8.24.0 | ||
jedi==0.19.1 | ||
Jinja2==3.1.3 | ||
markdown-it-py==3.0.0 | ||
MarkupSafe==2.1.5 | ||
matplotlib-inline==0.1.7 | ||
mdurl==0.1.2 | ||
mpmath==1.3.0 | ||
networkx==3.3 | ||
numpy==1.26.4 | ||
nvidia-cublas-cu12==12.1.3.1 | ||
nvidia-cuda-cupti-cu12==12.1.105 | ||
nvidia-cuda-nvrtc-cu12==12.1.105 | ||
nvidia-cuda-runtime-cu12==12.1.105 | ||
nvidia-cudnn-cu12==8.9.2.26 | ||
nvidia-cufft-cu12==11.0.2.54 | ||
nvidia-curand-cu12==10.3.2.106 | ||
nvidia-cusolver-cu12==11.4.5.107 | ||
nvidia-cusparse-cu12==12.1.0.106 | ||
nvidia-nccl-cu12==2.20.5 | ||
nvidia-nvjitlink-cu12==12.4.127 | ||
nvidia-nvtx-cu12==12.1.105 | ||
orjson==3.10.3 | ||
packaging==24.0 | ||
parso==0.8.4 | ||
pexpect==4.9.0 | ||
prompt-toolkit==3.0.43 | ||
psutil==5.9.8 | ||
ptyprocess==0.7.0 | ||
pure-eval==0.2.2 | ||
pydantic==2.7.1 | ||
pydantic_core==2.18.2 | ||
Pygments==2.18.0 | ||
python-dotenv==1.0.1 | ||
python-multipart==0.0.9 | ||
PyYAML==6.0.1 | ||
regex==2024.4.28 | ||
requests==2.31.0 | ||
rich==13.7.1 | ||
safetensors==0.4.3 | ||
shellingham==1.5.4 | ||
six==1.16.0 | ||
sniffio==1.3.1 | ||
stack-data==0.6.3 | ||
starlette==0.37.2 | ||
sympy==1.12 | ||
tokenizers==0.19.1 | ||
torch==2.3.0 | ||
tqdm==4.66.4 | ||
traitlets==5.14.3 | ||
transformers==4.40.1 | ||
triton==2.3.0 | ||
typer==0.12.3 | ||
typing_extensions==4.11.0 | ||
ujson==5.9.0 | ||
urllib3==2.2.1 | ||
uvicorn==0.29.0 | ||
uvloop==0.19.0 | ||
watchfiles==0.21.0 | ||
wcwidth==0.2.13 | ||
websockets==12.0 |