From 3d0564e589ee228e0412467d6eefeaf5baf16a16 Mon Sep 17 00:00:00 2001 From: Chenxi Date: Sun, 1 Oct 2023 19:53:09 +0100 Subject: [PATCH] replicate --- README.md | 1 + cog.yaml | 23 +++++++++++++++++++++++ predict.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 cog.yaml create mode 100644 predict.py diff --git a/README.md b/README.md index f69338c..24f4768 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Please refer to [Technical Report](https://arxiv.org/pdf/2309.15112.pdf) for mor ## Demo +[![Replicate](https://replicate.com/cjwbw/internlm-xcomposer/badge)](https://replicate.com/cjwbw/internlm-xcomposer) https://github.com/InternLM/InternLM-XComposer/assets/22662425/fdb89a38-c650-45f2-b5b7-51182e89a5cc diff --git a/cog.yaml b/cog.yaml new file mode 100644 index 0000000..7a9ad3a --- /dev/null +++ b/cog.yaml @@ -0,0 +1,23 @@ +# Configuration for Cog ⚙️ +# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md + +build: + gpu: true + python_version: "3.9" + system_packages: + - "libgl1-mesa-glx" + - "libglib2.0-0" + - "ninja-build" + python_packages: + - "xlsxwriter==3.1.2" + - "sentencepiece==0.1.99" + - "transformers==4.33.3" + - "torch==2.0.1" + - "pillow==10.0.1" + - "torchvision==0.15.2" + - ipython + - "timm==0.4.12" + - "einops==0.6.1" + run: + - git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention && python setup.py install && cd csrc/rotary && pip install -e . +predict: "predict.py:Predictor" diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..f7f7376 --- /dev/null +++ b/predict.py @@ -0,0 +1,35 @@ +# Prediction interface for Cog ⚙️ +# https://github.com/replicate/cog/blob/main/docs/python.md + + +import torch +from transformers import AutoModel, AutoTokenizer +from cog import BasePredictor, Input, Path + + +class Predictor(BasePredictor): + def setup(self) -> None: + """Load the model into memory to make running multiple predictions efficient""" + torch.set_grad_enabled(False) + self.model = ( + AutoModel.from_pretrained( + "internlm/internlm-xcomposer-7b", + cache_dir="model_cache", + trust_remote_code=True, + ) + .cuda() + .eval() + ) + tokenizer = AutoTokenizer.from_pretrained( + "internlm/internlm-xcomposer-7b", trust_remote_code=True + ) + self.model.tokenizer = tokenizer + + def predict( + self, + image: Path = Input(description="Input image.", default=None), + text: str = Input(description="Input text."), + ) -> str: + """Run a single prediction on the model""" + output = self.model.generate(text, str(image) if image else None) + return output