From 10898c53df24f9222f03a8c75d535d0a0592abac Mon Sep 17 00:00:00 2001
From: Nils <git@cyclenerd.de>
Date: Thu, 14 Dec 2023 11:16:13 +0100
Subject: [PATCH] gemini

---
 README.md        | 17 +++++++++++++----
 requirements.txt | 14 +++++++-------
 vertex.py        |  5 +++++
 3 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/README.md b/README.md
index 1535b3b..876e3e6 100644
--- a/README.md
+++ b/README.md
@@ -4,18 +4,18 @@
 [![Bagde: OpenAI](https://img.shields.io/badge/OpenAI-%23412991.svg?logo=openai&logoColor=white)](#readme)
 [![Bagde: Python](https://img.shields.io/badge/Python-3670A0?logo=python&logoColor=ffdd54)](#readme)
 
-This project is a drop-in replacement REST API for Vertex AI that is compatible with the OpenAI API specifications.
+This project is a drop-in replacement REST API for Vertex AI (**PaLM 2, Codey, Gemini**) that is compatible with the OpenAI API specifications.
 
 Examples:
 
-| Chat with Bard in Chatbot UI                              | Get help from Bard in VSCode                      |
+| Chat with Gemini in Chatbot UI                            | Get help from Gemini in VSCode                    |
 |-----------------------------------------------------------|---------------------------------------------------|
 | ![Screenshot: Chatbot UI chat](./img/chatbot-ui-chat.png) | ![Screenshot: VSCode chat](./img/vscode-chat.png) |
 
 This project is inspired by the idea of [LocalAI](https://github.com/go-skynet/LocalAI)
 but with the focus on making [Google Cloud Platform Vertex AI PaLM](https://ai.google/) more accessible to anyone.
 
-A Google Cloud Run service is installed that translates the OpenAI API calls to Vertex AI (PaLM).
+A Google Cloud Run service is installed that translates the OpenAI API calls to Vertex AI (PaLM 2, Codey, Gemini).
 
 <p align="center">
   <picture>
@@ -127,7 +127,16 @@ export OPENAI_API_KEY="sk-XYZ"
 uvicorn vertex:app --reload
 ```
 
-Or run with `codechat-bison-32k` 32k model:
+Run with Gemini `gemini-pro` model:
+
+```bash
+export DEBUG="True"
+export OPENAI_API_KEY="sk-XYZ"
+export MODEL_NAME="gemini-pro"
+uvicorn vertex:app --reload
+```
+
+Run with Codey `codechat-bison-32k` model:
 
 ```bash
 export DEBUG="True"
diff --git a/requirements.txt b/requirements.txt
index 6cc5654..f0af7db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
-fastapi==0.103.0
-uvicorn==0.23.2
-pydantic==1.10.12
-sse-starlette==1.6.5
-langchain==0.0.329
-transformers==4.32.1
-google-cloud-aiplatform==1.31.1
\ No newline at end of file
+fastapi==0.105.0
+uvicorn==0.24.0
+pydantic==1.10.13
+sse-starlette==1.8.2
+langchain==0.0.350
+transformers==4.36.1
+google-cloud-aiplatform==1.38.1
\ No newline at end of file
diff --git a/vertex.py b/vertex.py
index 8d8b949..4c595d9 100644
--- a/vertex.py
+++ b/vertex.py
@@ -281,6 +281,8 @@ async def chat_completions(body: ChatBody, request: Request):
     top_p = float(body.top_p or default_top_p)
     max_output_tokens = int(body.max_tokens or default_max_output_tokens)
     # Note: Max output token:
+    # - gemini-pro: 8192
+    #   https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini
     # - chat-bison: 1024
     # - codechat-bison: 2048
     # - ..-32k: The total amount of input and output tokens adds up to 32k.
@@ -289,6 +291,9 @@ async def chat_completions(body: ChatBody, request: Request):
     if model_name == 'codechat-bison':
         if max_output_tokens > 2048:
             max_output_tokens = 2048
+    elif model_name.find("gemini-pro"):
+        if max_output_tokens > 8192:
+            max_output_tokens = 8192
     elif model_name.find("32k"):
         if max_output_tokens > 16000:
             max_output_tokens = 16000