mirror of
https://github.com/run-llama/llama_cloud_services.git
synced 2026-07-01 21:44:37 -04:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 79417bebc6 | |||
| 61e7ae74bc | |||
| 935a3a15c5 | |||
| 08c50c1a63 | |||
| 8ecdd4245c |
@@ -0,0 +1,5 @@
|
||||
---
|
||||
"llama-cloud-services-py": minor
|
||||
---
|
||||
|
||||
Escaping dollar signs in markdown output in jupyter notebooks to prevent them being interpreted as equation delimiters
|
||||
@@ -4,7 +4,10 @@ import re
|
||||
from pydantic import BaseModel, Field, SerializeAsAny
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from llama_cloud_services.parse.utils import make_api_request
|
||||
from llama_cloud_services.parse.utils import (
|
||||
make_api_request,
|
||||
is_jupyter,
|
||||
)
|
||||
from llama_index.core.async_utils import asyncio_run
|
||||
from llama_index.core.schema import Document, ImageDocument, ImageNode, TextNode
|
||||
|
||||
@@ -258,6 +261,24 @@ class JobResult(BaseModel):
|
||||
documents = await self.aget_text_documents(split_by_page)
|
||||
return [TextNode(text=doc.text, metadata=doc.metadata) for doc in documents]
|
||||
|
||||
def _format_markdown_for_notebook(self, text: Optional[str]) -> Optional[str]:
|
||||
"""Format markdown text for Jupyter notebook display by escaping dollar signs."""
|
||||
if text is None:
|
||||
return None
|
||||
|
||||
def escape_dollar_signs(text: str) -> str:
|
||||
"""Escape dollar signs in text to prevent Jupyter from interpreting them as LaTeX.
|
||||
|
||||
Args:
|
||||
text: The text to escape
|
||||
|
||||
Returns:
|
||||
Text with dollar signs escaped
|
||||
"""
|
||||
return text.replace("$", r"\$")
|
||||
|
||||
return escape_dollar_signs(text)
|
||||
|
||||
def get_markdown_documents(self, split_by_page: bool = False) -> List[Document]:
|
||||
"""
|
||||
Get the markdown documents from the job.
|
||||
@@ -268,17 +289,22 @@ class JobResult(BaseModel):
|
||||
if split_by_page:
|
||||
return [
|
||||
Document(
|
||||
text=page.md,
|
||||
text=self._format_markdown_for_notebook(page.md)
|
||||
if is_jupyter()
|
||||
else page.md,
|
||||
metadata={"page_number": page.page, "file_name": self.file_name},
|
||||
)
|
||||
for page in self.pages
|
||||
]
|
||||
else:
|
||||
text = self._page_separator.join(
|
||||
[page.md if page.md is not None else "" for page in self.pages]
|
||||
)
|
||||
return [
|
||||
Document(
|
||||
text=self._page_separator.join(
|
||||
[page.md if page.md is not None else "" for page in self.pages]
|
||||
),
|
||||
text=self._format_markdown_for_notebook(text)
|
||||
if is_jupyter()
|
||||
else text,
|
||||
metadata={"file_name": self.file_name},
|
||||
)
|
||||
]
|
||||
@@ -328,7 +354,10 @@ class JobResult(BaseModel):
|
||||
"""
|
||||
url = f"{self._base_url}/api/v1/parsing/job/{self.job_id}/result/raw/markdown"
|
||||
response = await make_api_request(self._client, "GET", url)
|
||||
return response.content.decode("utf-8")
|
||||
markdown = response.content.decode("utf-8")
|
||||
return (
|
||||
self._format_markdown_for_notebook(markdown) if is_jupyter() else markdown
|
||||
)
|
||||
|
||||
def get_text(self) -> str:
|
||||
"""
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import httpx
|
||||
import itertools
|
||||
import logging
|
||||
@@ -356,6 +357,17 @@ def partition_pages(
|
||||
return
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def is_jupyter() -> bool:
|
||||
"""Check if we're running in a Jupyter environment."""
|
||||
try:
|
||||
from IPython import get_ipython
|
||||
|
||||
return get_ipython().__class__.__name__ == "ZMQInteractiveShell"
|
||||
except (ImportError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
def extract_tables_from_json_results(
|
||||
json_results: List[dict], download_path: str
|
||||
) -> List[str]:
|
||||
|
||||
Generated
+2
-2
@@ -1,5 +1,5 @@
|
||||
version = 1
|
||||
revision = 2
|
||||
revision = 3
|
||||
requires-python = ">=3.9, <4.0"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.14'",
|
||||
@@ -1596,7 +1596,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "llama-cloud-services"
|
||||
version = "0.6.69"
|
||||
version = "0.6.70"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
||||
|
||||
Reference in New Issue
Block a user