diff --git a/hacker_news/llm/coze.py b/hacker_news/llm/coze.py index 8371126..7301d4d 100644 --- a/hacker_news/llm/coze.py +++ b/hacker_news/llm/coze.py @@ -58,7 +58,7 @@ def summarize_by_coze(content: str) -> str: content = sanitize_for_openai(content, overhead=1000) # For model: GPT-4 Turbo (128K), temperature: 0.5 - GPT-4 Turbo is an excellent rule follower. - prompt = (f'Use third person mood to summarize the main points of the following article delimited by triple backticks in 2 concise sentences. ' + prompt = (f'Use third person mood to summarize the main points of the following article delimited by triple backticks in 2 concise English sentences. ' f'Ensure the summary does not exceed 300 characters.\n' f'```{content}.```') @@ -75,20 +75,7 @@ def summarize_by_coze(content: str) -> str: 'stream': False, }) resp.raise_for_status() - - for line in resp.iter_lines(): - if line and line.startswith(b'data:'): - line = line[len(b'data:'):].strip() - try: - resp_json = json.loads(line) - except json.JSONDecodeError as e: - logger.warning(f'Failed to decode coze response, unexpected json {line}, error: {e}') - return '' - break - else: - logger.warning(f'Unexpected coze response, no data line found') - return '' - + resp_json = resp.json() except Exception as e: logger.warning(f'Failed to summarize using coze, {e}') return '' diff --git a/hacker_news/llm/openai.py b/hacker_news/llm/openai.py index e1715ca..eb38174 100644 --- a/hacker_news/llm/openai.py +++ b/hacker_news/llm/openai.py @@ -8,7 +8,10 @@ def sanitize_for_openai(text, overhead): # one token generally corresponds to ~4 characters, from https://platform.openai.com/tokenizer if len(text) > 4096 * 2: - enc = tiktoken.encoding_for_model(config.openai_model) + try: + enc = tiktoken.encoding_for_model(config.openai_model) # We have openai compatible apis now + except KeyError: + enc = tiktoken.encoding_for_model('gpt-3.5-turbo') tokens = enc.encode(text) if len(tokens) > 4096 - overhead: # 4096: model's context limit text = enc.decode(tokens[:4096 - overhead]) diff --git a/hacker_news/news.py b/hacker_news/news.py index 5703b0a..f0c028f 100644 --- a/hacker_news/news.py +++ b/hacker_news/news.py @@ -147,12 +147,13 @@ def summarize_by_openai(self, content): f'3 - Provide a Chinese translation of sentence: "{title}".\n' \ f'```{content.strip(".")}.```' try: - answer = self.openai_complete(prompt, True) - summary = self.parse_step_answer(answer).strip() - if not summary: # If step parse failed, ignore the translation - summary = self.openai_complete( - f'Summarize the article delimited by triple backticks in 2 sentences.\n' - f'```{content.strip(".")}.```', False) + # Too many exceptions to support translation, give up... + # answer = self.openai_complete(prompt, True) + # summary = self.parse_step_answer(answer).strip().strip(' *-') + # if not summary: # If step parse failed, ignore the translation + summary = self.openai_complete( + f'Use third person mood to summarize the main points of the following article delimited by triple backticks in 2 concise sentences. Ensure the summary does not exceed 100 characters.\n' + f'```{content.strip(".")}.```', False) return summary except Exception as e: logger.exception(f'Failed to summarize using openai, key #{config.openai_key_index}, {e}') # Make this error explicit in the log @@ -219,7 +220,7 @@ def openai_complete(self, prompt, need_json): return answer def parse_step_answer(self, answer): - if not answer: + if not answer or isinstance(answer, str): return answer db.translation.add(answer.get('summary', ''), answer.get('summary_zh', ''), 'zh') db.translation.add(self.title, self.parse_title_translation(answer.get('translation', '')), 'zh') diff --git a/page_content_extractor/http.py b/page_content_extractor/http.py index 40cadf2..f49255a 100644 --- a/page_content_extractor/http.py +++ b/page_content_extractor/http.py @@ -5,7 +5,6 @@ import urllib3 from fake_useragent import UserAgent from requests.adapters import HTTPAdapter -from urllib3.exceptions import InsecureRequestWarning from urllib3.util import timeout from urllib3.util.ssl_ import create_urllib3_context