From 10898c53df24f9222f03a8c75d535d0a0592abac Mon Sep 17 00:00:00 2001 From: Nils Date: Thu, 14 Dec 2023 11:16:13 +0100 Subject: [PATCH] gemini --- README.md | 17 +++++++++++++---- requirements.txt | 14 +++++++------- vertex.py | 5 +++++ 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 1535b3b..876e3e6 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,18 @@ [![Bagde: OpenAI](https://img.shields.io/badge/OpenAI-%23412991.svg?logo=openai&logoColor=white)](#readme) [![Bagde: Python](https://img.shields.io/badge/Python-3670A0?logo=python&logoColor=ffdd54)](#readme) -This project is a drop-in replacement REST API for Vertex AI that is compatible with the OpenAI API specifications. +This project is a drop-in replacement REST API for Vertex AI (**PaLM 2, Codey, Gemini**) that is compatible with the OpenAI API specifications. Examples: -| Chat with Bard in Chatbot UI | Get help from Bard in VSCode | +| Chat with Gemini in Chatbot UI | Get help from Gemini in VSCode | |-----------------------------------------------------------|---------------------------------------------------| | ![Screenshot: Chatbot UI chat](./img/chatbot-ui-chat.png) | ![Screenshot: VSCode chat](./img/vscode-chat.png) | This project is inspired by the idea of [LocalAI](https://github.com/go-skynet/LocalAI) but with the focus on making [Google Cloud Platform Vertex AI PaLM](https://ai.google/) more accessible to anyone. -A Google Cloud Run service is installed that translates the OpenAI API calls to Vertex AI (PaLM). +A Google Cloud Run service is installed that translates the OpenAI API calls to Vertex AI (PaLM 2, Codey, Gemini).

@@ -127,7 +127,16 @@ export OPENAI_API_KEY="sk-XYZ" uvicorn vertex:app --reload ``` -Or run with `codechat-bison-32k` 32k model: +Run with Gemini `gemini-pro` model: + +```bash +export DEBUG="True" +export OPENAI_API_KEY="sk-XYZ" +export MODEL_NAME="gemini-pro" +uvicorn vertex:app --reload +``` + +Run with Codey `codechat-bison-32k` model: ```bash export DEBUG="True" diff --git a/requirements.txt b/requirements.txt index 6cc5654..f0af7db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -fastapi==0.103.0 -uvicorn==0.23.2 -pydantic==1.10.12 -sse-starlette==1.6.5 -langchain==0.0.329 -transformers==4.32.1 -google-cloud-aiplatform==1.31.1 \ No newline at end of file +fastapi==0.105.0 +uvicorn==0.24.0 +pydantic==1.10.13 +sse-starlette==1.8.2 +langchain==0.0.350 +transformers==4.36.1 +google-cloud-aiplatform==1.38.1 \ No newline at end of file diff --git a/vertex.py b/vertex.py index 8d8b949..4c595d9 100644 --- a/vertex.py +++ b/vertex.py @@ -281,6 +281,8 @@ async def chat_completions(body: ChatBody, request: Request): top_p = float(body.top_p or default_top_p) max_output_tokens = int(body.max_tokens or default_max_output_tokens) # Note: Max output token: + # - gemini-pro: 8192 + # https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini # - chat-bison: 1024 # - codechat-bison: 2048 # - ..-32k: The total amount of input and output tokens adds up to 32k. @@ -289,6 +291,9 @@ async def chat_completions(body: ChatBody, request: Request): if model_name == 'codechat-bison': if max_output_tokens > 2048: max_output_tokens = 2048 + elif model_name.find("gemini-pro"): + if max_output_tokens > 8192: + max_output_tokens = 8192 elif model_name.find("32k"): if max_output_tokens > 16000: max_output_tokens = 16000