Skip to content

Commit

Permalink
Restore origin url after using jina proxy
Browse files Browse the repository at this point in the history
  • Loading branch information
polyrabbit committed Jun 6, 2024
1 parent cc43bc5 commit e938b54
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
2 changes: 1 addition & 1 deletion hacker_news/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def context_limit():
return 128 * 1024
if '32k' in model or 'mistral-7b' in model:
return 32 * 1024
if 'gemma' in model or 'llama' in model:
if 'gemma' in model or 'llama' in model or '8192' in model:
return 8 * 1024
return 4096

Expand Down
7 changes: 5 additions & 2 deletions page_content_extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
__all__ = ['ParseError', 'parser_factory']

logger = logging.getLogger(__name__)
jina_prefix = 'https://r.jina.ai/'


# dispatcher
Expand All @@ -29,6 +30,8 @@ def parser_factory(url, use_jina=False):
# Some sites like science.org forbid us by responding 403, but still have meta description tags, so donot raise here
if use_jina: # Switch to origin url
resp.raise_for_status()
url = url.removeprefix(jina_prefix)
resp.url = resp.url.removeprefix(jina_prefix)

if EmbeddableExtractor.is_embeddable(url):
logger.info('Get an embeddable to parse(%s)', resp.url)
Expand All @@ -51,9 +54,9 @@ def parser_factory(url, use_jina=False):
if not use_jina and p.is_empty():
logger.info('%s is empty? switch to jina', resp.url)
try:
return parser_factory('https://r.jina.ai/'+url, use_jina=True)
return parser_factory(jina_prefix+url, use_jina=True)
except Exception as e:
logger.warning('jina %s throws an error: %s', 'https://r.jina.ai/'+url, e)
logger.warning('jina %s throws an error: %s', jina_prefix+url, e)
return p

raise TypeError(f'I have no idea how the {ct} is formatted')
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ olefile
openai==0.28.1
torch==2.1.0
bert-extractive-summarizer==0.10.1
transformers==4.34.0
transformers==4.36.0
python-dotenv==1.0.0
python_slugify==8.0.1
sqlalchemy==2.0.21
Expand Down

0 comments on commit e938b54

Please sign in to comment.