diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml
index 27d8a56..8e75fd4 100644
--- a/.github/workflows/publish-docs.yml
+++ b/.github/workflows/publish-docs.yml
@@ -7,7 +7,6 @@ on:
permissions:
contents: write
jobs:
-
deploy:
runs-on: ubuntu-latest
steps:
@@ -26,5 +25,5 @@ jobs:
path: .cache
restore-keys: |
mkdocs-material-
- - run: pip install mkdocs-material
+ - run: pip install -r requirements-docs.txt
- run: mkdocs gh-deploy --force
diff --git a/docs/index.md b/docs/index.md
index 22df78b..a997792 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -20,6 +20,8 @@ Open Parse is designed to fill this gap by providing a flexible, easy-to-use lib
## Quick Start
+## Basic Example
+
```python
import openparse
@@ -31,8 +33,45 @@ for node in parsed_basic_doc.nodes:
print(node)
```
+**📓 Try the sample notebook** here
+
+
+## Semantic Processing Example
+
+Chunking documents is fundamentally about grouping similar semantic nodes together. By embedding the text of each node, we can then cluster them together based on their similarity.
+
+```python
+from openparse import processing, DocumentParser
+
+semantic_pipeline = processing.SemanticIngestionPipeline(
+ openai_api_key=OPEN_AI_KEY,
+ model="text-embedding-3-large",
+ min_tokens=64,
+ max_tokens=1024,
+)
+parser = DocumentParser(
+ processing_pipeline=semantic_pipeline,
+)
+parsed_content = parser.parse(basic_doc_path)
+```
+
+**📓 Sample notebook** here
+
-**📓 Try the sample notebook** here
-
+
+## Cookbooks
+
+https://github.com/Filimoa/open-parse/tree/main/src/cookbooks
+
+
+## Sponsors
+
+
+
+
+
+
+
+Does your use case need something special? Reach [out](https://www.linkedin.com/in/sergey-osu/).
diff --git a/mkdocs.yml b/mkdocs.yml
index 9a6eb65..42b7ffd 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,18 +1,69 @@
site_name: Open Parse
+site_author: Sergey Filimonov
+repo_url: "https://github.com/Filimoa/open-parse/"
+repo_name: "open-parse"
+site_url: "https://github.com/Filimoa/open-parse/"
theme:
name: material
features:
- content.code.copy
-
+ palette:
+ - scheme: default
+ primary: black
+ accent: indigo
+ toggle:
+ icon: material/brightness-7
+ name: Switch to dark mode
+ - scheme: slate
+ primary: black
+ accent: indigo
+ toggle:
+ icon: material/brightness-4
+ name: Switch to light mode
+ font:
+ text: Roboto
+ code: Roboto Mono
markdown_extensions:
+ - abbr
- admonition
+ - pymdownx.details
+ - attr_list
+ - def_list
+ - footnotes
+ - md_in_html
+ - toc:
+ permalink: true
+ - pymdownx.arithmatex:
+ generic: true
+ - pymdownx.betterem:
+ smart_enable: all
+ - pymdownx.caret
+ - pymdownx.details
+ - pymdownx.emoji:
+ emoji_generator: !!python/name:material.extensions.emoji.to_svg
+ emoji_index: !!python/name:material.extensions.emoji.twemoji
- pymdownx.highlight:
anchor_linenums: true
line_spans: __span
pygments_lang_class: true
- pymdownx.inlinehilite
- - pymdownx.snippets
- - pymdownx.superfences
+ - pymdownx.keys
+ - pymdownx.mark
+ - pymdownx.smartsymbols
+ - pymdownx.snippets:
+ auto_append:
+ - includes/mkdocs.md
+ - pymdownx.superfences:
+ custom_fences:
+ - name: mermaid
+ class: mermaid
+ format: !!python/name:pymdownx.superfences.fence_code_format
+ - pymdownx.tabbed:
+ alternate_style: true
+ combine_header_slug: true
+ - pymdownx.tasklist:
+ custom_checkbox: true
+
nav:
- Home: index.md
- Parsing Text:
@@ -28,3 +79,16 @@ nav:
- Customization: processing/customization.md
- Serializing Results: serialization.md
- Visualization: visualization.md
+
+plugins:
+ - search:
+ separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
+ - minify:
+ minify_html: true
+ - mkdocstrings:
+ handlers:
+ python:
+ options:
+ members_order: alphabetical
+ allow_inspection: true
+ show_bases: true
diff --git a/requirements-dev.txt b/requirements-dev.txt
index a8e5fa8..fa133dd 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,4 +1,5 @@
-r requirements.txt
+-r requirements-docs.txt
pytest
ruff
mypy
@@ -8,5 +9,3 @@ beautifulsoup4
twine
packaging
wheel
-mkdocs-material
-mkdocs-material-extensions
diff --git a/requirements-docs.txt b/requirements-docs.txt
new file mode 100644
index 0000000..ab326d9
--- /dev/null
+++ b/requirements-docs.txt
@@ -0,0 +1,6 @@
+mkdocs-material
+mkdocs-material-extensions
+mkdocstrings-python
+mkdocs-jupyter
+pymdown-extensions
+mkdocs-minify-plugin