Making TextIteratorStreamer in HuggingFace work with StreamingResponse in FastAPI #9407
-
First Check
Commit to Help
Example Codeimport uvicorn
from fastapi import FastAPI, Depends, HTTPException, status, Request
from fastapi.responses import StreamingResponse
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
import torch
model_id = "declare-lab/flan-alpaca-xl"
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on device:", torch_device)
print("CPU threads:", torch.get_num_threads())
model = AutoModelForSeq2SeqLM.from_pretrained(
model_id, load_in_8bit=True, device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
def run_generation(user_text):
# Get the model and tokenizer, and tokenize the user text.
model_inputs = tokenizer([user_text], return_tensors="pt").to(torch_device)
# Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
# in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
streamer = TextIteratorStreamer(
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = dict(
model_inputs,
streamer=streamer,
max_new_tokens=250,
do_sample=True,
top_p=0.95,
temperature=float(0.8),
top_k=1,
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
# Pull the generated text from the streamer, and update the model output.
model_output = ""
for new_text in streamer:
model_output += new_text
yield new_text
return model_output
app = FastAPI()
@app.get("/")
async def root():
return {"message": "Welcome to FastAPI!"}
@app.post("/generate")
async def generate(query: str):
stream_response = run_generation(query)
return StreamingResponse(stream_response, media_type="text/plain")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8080, log_level="debug") DescriptionThere is a new feature in HuggingFace called
I am trying to make this work with
Screen.Recording.2023-04-18.at.19.39.52.mov
Screen.Recording.2023-04-18.at.19.38.21.movOperating SystemLinux Operating System DetailsNo response FastAPI Version0.95.0 Python Version3.9.16 Additional ContextApologies in advance if this seems to be something obvious |
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 2 replies
-
|
Beta Was this translation helpful? Give feedback.
-
@maziyarpanahi How you fixed this issue? Because I didn't see the |
Beta Was this translation helpful? Give feedback.
-
meet same problem,how to fix quickly |
Beta Was this translation helpful? Give feedback.
yield new_text
actually does work, sorry for the question.