diff --git a/hacker_news/llm/openai.py b/hacker_news/llm/openai.py index 296015d..6ed095d 100644 --- a/hacker_news/llm/openai.py +++ b/hacker_news/llm/openai.py @@ -117,7 +117,7 @@ def call_openai_family(content: str, sys_prompt: str) -> str: for line in answer.split('\n'): if not line.strip(): continue - if 'summary' in line.lower() and len(line) <= 100: + if 'summary' in line.lower() and line.strip()[-1] == ':': continue answer = line break diff --git a/hacker_news/news.py b/hacker_news/news.py index 27bd64c..620cde3 100644 --- a/hacker_news/news.py +++ b/hacker_news/news.py @@ -80,6 +80,9 @@ def get_score(self) -> int: except: return 0 + def is_hiring_job(self) -> bool: + return self.get_score() == 0 and not self.author and 'YC ' in self.title + def slug(self): return slugify(self.title or 'no title') @@ -130,7 +133,8 @@ def summarize_by_openai(self, content): if not openai.api_key: logger.info("OpenAI API key is not set") return '' - if self.get_score() < config.openai_score_threshold: # Avoid expensive openai + if (self.get_score() < config.openai_score_threshold # Avoid expensive openai + and not self.is_hiring_job()): logger.info("Score %d is too small, ignore openai", self.get_score()) return '' diff --git a/test/test_hackernews_parser.py b/test/test_hackernews_parser.py index fa755ac..f343bd1 100644 --- a/test/test_hackernews_parser.py +++ b/test/test_hackernews_parser.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta from hacker_news.algolia_api import get_news +from hacker_news.news import News from hacker_news.parser import HackerNewsParser @@ -52,3 +53,7 @@ def test_algolia_api(self): date = news_list[0].submit_time.date() for news in news_list: self.assertEqual(date, news.submit_time.date()) + + def test_maybe_jobs_post(self): + news = News(title='MixRank (YC S11) Is Hiring Software Engineers and Founders Globally') + self.assertTrue(news.is_hiring_job()) \ No newline at end of file