From 9167f75ea77fead3669002bcbdb413e090bf7c80 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 9 Apr 2024 14:13:35 -0600 Subject: [PATCH] update to docs --- .github/workflows/publish-docs.yml | 3 +- docs/index.md | 43 +++++++++++++++++- mkdocs.yml | 70 ++++++++++++++++++++++++++++-- requirements-dev.txt | 3 +- requirements-docs.txt | 6 +++ 5 files changed, 116 insertions(+), 9 deletions(-) create mode 100644 requirements-docs.txt diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml index 27d8a56..8e75fd4 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/publish-docs.yml @@ -7,7 +7,6 @@ on: permissions: contents: write jobs: - deploy: runs-on: ubuntu-latest steps: @@ -26,5 +25,5 @@ jobs: path: .cache restore-keys: | mkdocs-material- - - run: pip install mkdocs-material + - run: pip install -r requirements-docs.txt - run: mkdocs gh-deploy --force diff --git a/docs/index.md b/docs/index.md index 22df78b..a997792 100644 --- a/docs/index.md +++ b/docs/index.md @@ -20,6 +20,8 @@ Open Parse is designed to fill this gap by providing a flexible, easy-to-use lib ## Quick Start +## Basic Example + ```python import openparse @@ -31,8 +33,45 @@ for node in parsed_basic_doc.nodes: print(node) ``` +**📓 Try the sample notebook** here + + +## Semantic Processing Example + +Chunking documents is fundamentally about grouping similar semantic nodes together. By embedding the text of each node, we can then cluster them together based on their similarity. + +```python +from openparse import processing, DocumentParser + +semantic_pipeline = processing.SemanticIngestionPipeline( + openai_api_key=OPEN_AI_KEY, + model="text-embedding-3-large", + min_tokens=64, + max_tokens=1024, +) +parser = DocumentParser( + processing_pipeline=semantic_pipeline, +) +parsed_content = parser.parse(basic_doc_path) +``` + +**📓 Sample notebook** here +
-**📓 Try the sample notebook** here -

+ +## Cookbooks + +https://github.com/Filimoa/open-parse/tree/main/src/cookbooks + + +## Sponsors + + + + + + + +Does your use case need something special? Reach [out](https://www.linkedin.com/in/sergey-osu/). diff --git a/mkdocs.yml b/mkdocs.yml index 9a6eb65..42b7ffd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,18 +1,69 @@ site_name: Open Parse +site_author: Sergey Filimonov +repo_url: "https://github.com/Filimoa/open-parse/" +repo_name: "open-parse" +site_url: "https://github.com/Filimoa/open-parse/" theme: name: material features: - content.code.copy - + palette: + - scheme: default + primary: black + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + font: + text: Roboto + code: Roboto Mono markdown_extensions: + - abbr - admonition + - pymdownx.details + - attr_list + - def_list + - footnotes + - md_in_html + - toc: + permalink: true + - pymdownx.arithmatex: + generic: true + - pymdownx.betterem: + smart_enable: all + - pymdownx.caret + - pymdownx.details + - pymdownx.emoji: + emoji_generator: !!python/name:material.extensions.emoji.to_svg + emoji_index: !!python/name:material.extensions.emoji.twemoji - pymdownx.highlight: anchor_linenums: true line_spans: __span pygments_lang_class: true - pymdownx.inlinehilite - - pymdownx.snippets - - pymdownx.superfences + - pymdownx.keys + - pymdownx.mark + - pymdownx.smartsymbols + - pymdownx.snippets: + auto_append: + - includes/mkdocs.md + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + combine_header_slug: true + - pymdownx.tasklist: + custom_checkbox: true + nav: - Home: index.md - Parsing Text: @@ -28,3 +79,16 @@ nav: - Customization: processing/customization.md - Serializing Results: serialization.md - Visualization: visualization.md + +plugins: + - search: + separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])' + - minify: + minify_html: true + - mkdocstrings: + handlers: + python: + options: + members_order: alphabetical + allow_inspection: true + show_bases: true diff --git a/requirements-dev.txt b/requirements-dev.txt index a8e5fa8..fa133dd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,5 @@ -r requirements.txt +-r requirements-docs.txt pytest ruff mypy @@ -8,5 +9,3 @@ beautifulsoup4 twine packaging wheel -mkdocs-material -mkdocs-material-extensions diff --git a/requirements-docs.txt b/requirements-docs.txt new file mode 100644 index 0000000..ab326d9 --- /dev/null +++ b/requirements-docs.txt @@ -0,0 +1,6 @@ +mkdocs-material +mkdocs-material-extensions +mkdocstrings-python +mkdocs-jupyter +pymdown-extensions +mkdocs-minify-plugin