chore: Part 5 - documentation updates (#218)

Update parameters across files to ensure intended inference and best practices --------- Signed-off-by: Ishaan Sehgal <[email protected]>
Azure · Feb 5, 2024 · 86d0f46 · 86d0f46
1 parent 2255416
commit 86d0f46
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 26 deletions.
diff --git a/.github/workflows/kind-cluster/main.py b/.github/workflows/kind-cluster/main.py
@@ -6,8 +6,6 @@
 import time
 from pathlib import Path
 
-import yaml
-
 KAITO_REPO_URL = "https://github.com/Azure/kaito.git"
 
 def get_weights_path(model_name): 

diff --git a/presets/models/falcon/README.md b/presets/models/falcon/README.md
@@ -25,34 +25,44 @@ curl -X POST \
     -H "Content-Type: application/json" \
     -d '{
         "prompt":"YOUR_PROMPT_HERE",
-        "max_length":200,
-        "min_length":0,
-        "do_sample":true,
-        "early_stopping":false,
-        "num_beams":1,
-        "num_beam_groups":1,
-        "diversity_penalty":0.0,
-        "temperature":1.0,
-        "top_k":10,
-        "top_p":1,
-        "typical_p":1,
-        "repetition_penalty":1,
-        "length_penalty":1,
-        "no_repeat_ngram_size":0,
-        "encoder_no_repeat_ngram_size":0,
-        "bad_words_ids":null,
-        "num_return_sequences":1,
-        "output_scores":false,
-        "return_dict_in_generate":false,
-        "forced_bos_token_id":null,
-        "forced_eos_token_id":null,
-        "remove_invalid_values":null
+        "return_full_text": false,
+        "clean_up_tokenization_spaces": false, 
+        "prefix": null,
+        "handle_long_generation": null,
+        "generate_kwargs": {
+                "max_length":200,
+                "min_length":0,
+                "do_sample":true,
+                "early_stopping":false,
+                "num_beams":1,
+                "num_beam_groups":1,
+                "diversity_penalty":0.0,
+                "temperature":1.0,
+                "top_k":10,
+                "top_p":1,
+                "typical_p":1,
+                "repetition_penalty":1,
+                "length_penalty":1,
+                "no_repeat_ngram_size":0,
+                "encoder_no_repeat_ngram_size":0,
+                "bad_words_ids":null,
+                "num_return_sequences":1,
+                "output_scores":false,
+                "return_dict_in_generate":false,
+                "forced_bos_token_id":null,
+                "forced_eos_token_id":null,
+                "remove_invalid_values":null
+            }
         }' \
         "http://<SERVICE>:80/chat"
 ```
 
 ### Parameters
 - `prompt`: The initial text provided by the user, from which the model will continue generating text.
+- `return_full_text`: If False only generated text is returned, else full text is returned.
+- `clean_up_tokenization_spaces`: True/False, determines whether to remove potential extra spaces in the text output.
+- `prefix`: Prefix added to the prompt.
+- `handle_long_generation`: Provides strategies to address generations beyond the model's maximum length capacity.
 - `max_length`: The maximum total number of tokens in the generated text.
 - `min_length`: The minimum total number of tokens that should be generated.
 - `do_sample`: If True, sampling methods will be used for text generation, which can introduce randomness and variation.

diff --git a/presets/models/llama2/README.md b/presets/models/llama2/README.md
@@ -31,7 +31,8 @@ Use the following command to build the llama2 inference service image from the r
 docker build \
   --file docker/presets/llama-2/Dockerfile \
   --build-arg WEIGHTS_PATH=$LLAMA_WEIGHTS_PATH \
-  --build-arg MODEL_PRESET_PATH=presets/models/llama2 \
+  --build-arg MODEL_TYPE=llama2-completion \
+  --build-arg VERSION=0.0.1 \
   -t $LLAMA_MODEL_NAME:latest .
 ```
 

diff --git a/presets/models/llama2chat/README.md b/presets/models/llama2chat/README.md
@@ -31,7 +31,8 @@ Use the following command to build the llama2chat inference service image from t
 docker build \
   --file docker/presets/llama-2/Dockerfile \
   --build-arg WEIGHTS_PATH=$LLAMA_WEIGHTS_PATH \
-  --build-arg MODEL_PRESET_PATH=presets/models/llama2chat \
+  --build-arg MODEL_TYPE=llama2-chat \
+  --build-arg VERSION=0.0.1 \
   -t $LLAMA_MODEL_NAME:latest .
 ```