Switch to llama-3-8b model

polyrabbit · May 11, 2024 · a8ca8c6 · a8ca8c6
1 parent d3efb11
commit a8ca8c6
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 40 deletions.
diff --git a/db/summary.py b/db/summary.py
@@ -29,7 +29,7 @@ def local_llm(self):
         return self in (Model.LLAMA, Model.TRANSFORMER)
 
     def is_finally(self) -> bool:  # already best, no need to try other models
-        return self in (Model.EMBED, Model.OPENAI, Model.GEMMA)
+        return self in (Model.EMBED, Model.OPENAI, Model.GEMMA, Model.LLAMA)
 
     def need_escape(self):
         return self in (Model.OPENAI,)

diff --git a/db/translation.py b/db/translation.py
@@ -36,6 +36,13 @@ def get(text, to_lang):
     return text
 
 
+def exists(text, to_lang) -> bool:
+    text = text[:Translation.source.type.length]
+    stmt = select(Translation).where(Translation.source == text, Translation.language == to_lang)
+    with session_scope(defer_commit=True) as session:
+        return session.scalars(stmt).first()
+
+
 def add(source, target, lang):
     if not (source and target):
         return

diff --git a/hacker_news/llm/openai.py b/hacker_news/llm/openai.py
@@ -51,7 +51,7 @@ def sanitize_title(title):
     return title.replace('"', "'").replace('\n', ' ').strip()
 
 
-def summarize_by_openai_family(content: str, need_json: bool) -> str:
+def call_openai_family(content: str, sys_prompt: str) -> str:
     start_time = time.time()
 
     # 200: function + prompt tokens (to reduce hitting rate limit)
@@ -75,26 +75,9 @@ def summarize_by_openai_family(content: str, need_json: bool) -> str:
               "frequency_penalty": 1,  # Avoid token repetition
               "presence_penalty": 1,
               'timeout': 30}
-    if need_json:
-        kwargs['functions'] = [{"name": "render", "parameters": {
-            "type": "object",
-            "properties": {
-                "summary": {
-                    "type": "string",
-                    "description": "English summary"
-                },
-                "summary_zh": {
-                    "type": "string",
-                    "description": "Chinese summary"
-                },
-                "translation": {
-                    "type": "string",
-                    "description": "Chinese translation of sentence"
-                },
-            },
-            # "required": ["summary"]  # ChatGPT only returns the required field?
-        }}]
-        kwargs['function_call'] = {"name": "render"}
+    if model_family() == Model.GEMMA:
+        # Gemma outputs weird words like Kün/viciss/▁purcha/▁xPos/▁Gorb
+        kwargs['logit_bias'] = {200507: -100, 225856: -100, 6204: -100, 232014: -100, 172406: -100}
 
     if config.openai_model.startswith('text-'):
         prompt = (f'Use third person mood to summarize the following article delimited by triple backticks in 2 concise English sentences. Ensure the summary does not exceed 100 characters.\n'
@@ -109,8 +92,7 @@ def summarize_by_openai_family(content: str, need_json: bool) -> str:
             messages=[
                 {
                     "role": "system",
-                    "content": "You are a helpful summarizer. Please think step by step and use third person mood to summarize all user's input in 2 short English sentences. "
-                               "Ensure the summary does not exceed 100 characters. Provide response in plain text format without any Markdown formatting."
+                    "content": sys_prompt
                 },
                 {'role': 'user', 'content': content},
             ],
@@ -143,4 +125,14 @@ def summarize_by_openai_family(content: str, need_json: bool) -> str:
     answer = re.sub(r'^[^a-zA-Z0-9]+', '', answer)
     # Always have bold **?
     answer = answer.replace('**', ' ')
+    answer = re.sub(r'^summary:?', '', answer, flags=re.IGNORECASE)
     return answer.strip()
+
+
+def summarize_by_openai_family(content: str) -> str:
+    return call_openai_family(content, "You are a helpful summarizer. Please think step by step and use third person mood to summarize all user's input in 2 short English sentences. "
+                                       "Ensure the summary does not exceed 100 characters. Provide response in plain text format without any Markdown formatting.")
+
+
+def translate_by_openai_family(content: str, lang: str) -> str:
+    return call_openai_family(content, f"You are a helpful translator. Translate user's input into {lang}.")
diff --git a/hacker_news/news.py b/hacker_news/news.py
@@ -3,7 +3,6 @@
 import os
 import re
 import time
-from json import JSONDecodeError
 
 import openai
 from slugify import slugify
@@ -12,7 +11,7 @@
 import db.summary
 from db.summary import Model
 from hacker_news.llm.coze import summarize_by_coze
-from hacker_news.llm.openai import summarize_by_openai_family, model_family
+from hacker_news.llm.openai import summarize_by_openai_family, model_family, translate_by_openai_family
 from page_content_extractor import parser_factory
 from page_content_extractor.webimage import WebImage
 
@@ -136,21 +135,29 @@ def summarize_by_openai(self, content):
             return ''
 
         try:
-            # Too many exceptions to support translation, give up...
-            # answer = self.openai_complete(prompt, True)
-            # summary = self.parse_step_answer(answer).strip().strip(' *-')
-            # if not summary:  # If step parse failed, ignore the translation
-            return summarize_by_openai_family(content, False)
+            sum = summarize_by_openai_family(content)
+            self.translate_summary(sum)
+            return sum
         except Exception as e:
             logger.exception(f'Failed to summarize using openai, key #{config.openai_key_index}, {e}')  # Make this error explicit in the log
             return ''
 
-    def parse_step_answer(self, answer):
-        if not answer or isinstance(answer, str):
-            return answer
-        db.translation.add(answer.get('summary', ''), answer.get('summary_zh', ''), 'zh')
-        db.translation.add(self.title, self.parse_title_translation(answer.get('translation', '')), 'zh')
-        return answer.get('summary', '')
+    def translate_summary(self, summary: str):
+        if not summary:
+            return
+        try:
+            if db.translation.exists(summary, 'zh'):
+                return
+            trans = translate_by_openai_family(summary, 'simplified Chinese')
+            for char in trans:
+                if '\u4e00' <= char <= '\u9fff':
+                    break
+            else:
+                logger.info(f'No Chinese chars in translation: {trans}')
+                return
+            db.translation.add(summary, trans, 'zh')
+        except Exception as e:
+            logger.exception(f'Failed to translate summary using openai, key #{config.openai_key_index}, {e}')
 
     def parse_title_translation(self, title):
         # Somehow, openai always return the original title

diff --git a/probe.py b/probe.py
@@ -19,7 +19,7 @@ def probe_hn_summary():
     body = resp.text
 
     assert "Hacker News" in body, '"Hacker News" not in response'
-    llm_summaries = body.count("OpenAI") + body.count("Gemma")
+    llm_summaries = body.count("OpenAI") + body.count("Gemma") + body.count("Llama")
     assert llm_summaries > 5, "Too few OpenAI summaries, only got %d" % llm_summaries
     logger.info(f'OpenAI summaries {llm_summaries} times')
 

diff --git a/test/test_news_summary.py b/test/test_news_summary.py
@@ -64,8 +64,8 @@ def test_summarize_by_openai_family(self):
         fpath = os.path.join(os.path.dirname(__file__), 'fixtures/telnet.txt')
         with open(fpath, 'r') as fp:
             content = fp.read()
-        summary = summarize_by_openai_family(content, False)
-        self.assertIn('Telnet', summary)
+        summary = summarize_by_openai_family(content)
+        self.assertIn('elnet', summary)
         self.assertFalse(summary.startswith(': '))
         self.assertGreater(len(summary), 80)
         self.assertLess(len(summary), config.summary_size * 2)