Compare commits

...

5 Commits

Author SHA1 Message Date
pjames27 79417bebc6 Create brave-moments-type.md 2025-10-03 13:25:50 -07:00
pjames27 61e7ae74bc Running make lint 2025-10-03 13:21:57 -07:00
pjames27 935a3a15c5 Adding memoization to avoid re-importing and catching 2025-10-03 13:17:26 -07:00
pjames27 08c50c1a63 Removing unnecessary print 2025-10-03 13:10:44 -07:00
pjames27 8ecdd4245c Escaping dollar signs in markdown output when running in a jupyter notebook 2025-10-03 12:33:19 -07:00
4 changed files with 54 additions and 8 deletions
+5
View File
@@ -0,0 +1,5 @@
---
"llama-cloud-services-py": minor
---
Escaping dollar signs in markdown output in jupyter notebooks to prevent them being interpreted as equation delimiters
+35 -6
View File
@@ -4,7 +4,10 @@ import re
from pydantic import BaseModel, Field, SerializeAsAny
from typing import Dict, Any, List, Optional
from llama_cloud_services.parse.utils import make_api_request
from llama_cloud_services.parse.utils import (
make_api_request,
is_jupyter,
)
from llama_index.core.async_utils import asyncio_run
from llama_index.core.schema import Document, ImageDocument, ImageNode, TextNode
@@ -258,6 +261,24 @@ class JobResult(BaseModel):
documents = await self.aget_text_documents(split_by_page)
return [TextNode(text=doc.text, metadata=doc.metadata) for doc in documents]
def _format_markdown_for_notebook(self, text: Optional[str]) -> Optional[str]:
"""Format markdown text for Jupyter notebook display by escaping dollar signs."""
if text is None:
return None
def escape_dollar_signs(text: str) -> str:
"""Escape dollar signs in text to prevent Jupyter from interpreting them as LaTeX.
Args:
text: The text to escape
Returns:
Text with dollar signs escaped
"""
return text.replace("$", r"\$")
return escape_dollar_signs(text)
def get_markdown_documents(self, split_by_page: bool = False) -> List[Document]:
"""
Get the markdown documents from the job.
@@ -268,17 +289,22 @@ class JobResult(BaseModel):
if split_by_page:
return [
Document(
text=page.md,
text=self._format_markdown_for_notebook(page.md)
if is_jupyter()
else page.md,
metadata={"page_number": page.page, "file_name": self.file_name},
)
for page in self.pages
]
else:
text = self._page_separator.join(
[page.md if page.md is not None else "" for page in self.pages]
)
return [
Document(
text=self._page_separator.join(
[page.md if page.md is not None else "" for page in self.pages]
),
text=self._format_markdown_for_notebook(text)
if is_jupyter()
else text,
metadata={"file_name": self.file_name},
)
]
@@ -328,7 +354,10 @@ class JobResult(BaseModel):
"""
url = f"{self._base_url}/api/v1/parsing/job/{self.job_id}/result/raw/markdown"
response = await make_api_request(self._client, "GET", url)
return response.content.decode("utf-8")
markdown = response.content.decode("utf-8")
return (
self._format_markdown_for_notebook(markdown) if is_jupyter() else markdown
)
def get_text(self) -> str:
"""
+12
View File
@@ -1,3 +1,4 @@
import functools
import httpx
import itertools
import logging
@@ -356,6 +357,17 @@ def partition_pages(
return
@functools.lru_cache(maxsize=1)
def is_jupyter() -> bool:
"""Check if we're running in a Jupyter environment."""
try:
from IPython import get_ipython
return get_ipython().__class__.__name__ == "ZMQInteractiveShell"
except (ImportError, AttributeError):
return False
def extract_tables_from_json_results(
json_results: List[dict], download_path: str
) -> List[str]:
Generated
+2 -2
View File
@@ -1,5 +1,5 @@
version = 1
revision = 2
revision = 3
requires-python = ">=3.9, <4.0"
resolution-markers = [
"python_full_version >= '3.14'",
@@ -1596,7 +1596,7 @@ wheels = [
[[package]]
name = "llama-cloud-services"
version = "0.6.69"
version = "0.6.70"
source = { editable = "." }
dependencies = [
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },