From 3d0564e589ee228e0412467d6eefeaf5baf16a16 Mon Sep 17 00:00:00 2001
From: Chenxi <chenxi.whitehouse@gmail.com>
Date: Sun, 1 Oct 2023 19:53:09 +0100
Subject: [PATCH] replicate

---
 README.md  |  1 +
 cog.yaml   | 23 +++++++++++++++++++++++
 predict.py | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)
 create mode 100644 cog.yaml
 create mode 100644 predict.py

diff --git a/README.md b/README.md
index f69338c..24f4768 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ Please refer to [Technical Report](https://arxiv.org/pdf/2309.15112.pdf) for mor
 
 
 ## Demo
+[![Replicate](https://replicate.com/cjwbw/internlm-xcomposer/badge)](https://replicate.com/cjwbw/internlm-xcomposer) 
 
 
 https://github.com/InternLM/InternLM-XComposer/assets/22662425/fdb89a38-c650-45f2-b5b7-51182e89a5cc
diff --git a/cog.yaml b/cog.yaml
new file mode 100644
index 0000000..7a9ad3a
--- /dev/null
+++ b/cog.yaml
@@ -0,0 +1,23 @@
+# Configuration for Cog ⚙️
+# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+
+build:
+  gpu: true
+  python_version: "3.9"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+    - "ninja-build"
+  python_packages:
+    - "xlsxwriter==3.1.2"
+    - "sentencepiece==0.1.99"
+    - "transformers==4.33.3"
+    - "torch==2.0.1"
+    - "pillow==10.0.1"
+    - "torchvision==0.15.2"
+    - ipython
+    - "timm==0.4.12"
+    - "einops==0.6.1"
+  run:
+    - git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention && python setup.py install && cd csrc/rotary && pip install -e .
+predict: "predict.py:Predictor"
diff --git a/predict.py b/predict.py
new file mode 100644
index 0000000..f7f7376
--- /dev/null
+++ b/predict.py
@@ -0,0 +1,35 @@
+# Prediction interface for Cog ⚙️
+# https://github.com/replicate/cog/blob/main/docs/python.md
+
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+from cog import BasePredictor, Input, Path
+
+
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        """Load the model into memory to make running multiple predictions efficient"""
+        torch.set_grad_enabled(False)
+        self.model = (
+            AutoModel.from_pretrained(
+                "internlm/internlm-xcomposer-7b",
+                cache_dir="model_cache",
+                trust_remote_code=True,
+            )
+            .cuda()
+            .eval()
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            "internlm/internlm-xcomposer-7b", trust_remote_code=True
+        )
+        self.model.tokenizer = tokenizer
+
+    def predict(
+        self,
+        image: Path = Input(description="Input image.", default=None),
+        text: str = Input(description="Input text."),
+    ) -> str:
+        """Run a single prediction on the model"""
+        output = self.model.generate(text, str(image) if image else None)
+        return output