Compare commits

...

4 Commits

Author SHA1 Message Date
pjames27 66a99d53d8 Unit test for _format_markdown_for_notebook
Doesn't depend on getting real results/is just testing a string manipulation function, so inserting before other tests. Should move to its own file if we add additional formatting configurations
2025-10-06 21:06:15 -07:00
pjames27 323617e9e3 Patch bump 2025-10-06 19:06:29 -07:00
pjames27 f7477f137f Updated uv.lock via make lint 2025-10-06 19:03:42 -07:00
pjames27 dfdfe058ca Limit escaping to lone dollar signs - preserve double dollar for latex equations 2025-10-06 19:02:13 -07:00
4 changed files with 51 additions and 7 deletions
+5
View File
@@ -0,0 +1,5 @@
---
"llama-cloud-services-py": patch
---
Now only escape single dollar signs - preserve double for latex equations
+10 -5
View File
@@ -266,18 +266,23 @@ class JobResult(BaseModel):
if text is None:
return None
def escape_dollar_signs(text: str) -> str:
"""Escape dollar signs in text to prevent Jupyter from interpreting them as LaTeX.
def escape_single_dollar_signs(text: str) -> str:
"""Escape single dollar signs in text to prevent Jupyter from interpreting them as LaTeX.
Preserves all strings of dollar signs greater than length 1,
especially preserving double dollar signs ($$) which denote LaTeX equations.
Args:
text: The text to escape
Returns:
Text with dollar signs escaped
Text with single dollar signs escaped
"""
return text.replace("$", r"\$")
# Replace single $ with \$, but preserve $$
# Use negative lookahead and lookbehind to match $ not preceded or followed by $
return re.sub(r"(?<!\$)\$(?!\$)", r"\$", text)
return escape_dollar_signs(text)
return escape_single_dollar_signs(text)
def get_markdown_documents(self, split_by_page: bool = False) -> List[Document]:
"""
+34
View File
@@ -6,6 +6,40 @@ from llama_cloud_services import LlamaParse
from llama_cloud_services.parse.types import JobResult
def test_format_parse_result_markdown_for_notebook():
"""Test the _format_markdown_for_notebook function.
Right now, the only work it does is escape single dollar signs."""
result = JobResult(job_id="test", file_name="test.pdf", job_result={})
# Test None input
assert result._format_markdown_for_notebook(None) is None
# Test single dollar sign gets escaped
assert result._format_markdown_for_notebook("This costs $5") == "This costs \\$5"
# Test double dollar signs are preserved (LaTeX equations)
assert (
result._format_markdown_for_notebook("$$x^2 + y^2 = z^2$$")
== "$$x^2 + y^2 = z^2$$"
)
# Test mixed single and double dollar signs
text = "This costs $5, but $$E = mc^2$$ is priceless"
expected = "This costs \\$5, but $$E = mc^2$$ is priceless"
assert result._format_markdown_for_notebook(text) == expected
# Test multiple single dollar signs
assert result._format_markdown_for_notebook("$10 and $20") == "\\$10 and \\$20"
# Test three or more consecutive dollar signs (preserve them)
assert result._format_markdown_for_notebook("$$$") == "$$$"
# Test adjacent dollar signs with text in between
text = "$$inline$$ and $separate"
expected = "$$inline$$ and \\$separate"
assert result._format_markdown_for_notebook(text) == expected
@pytest.fixture
def file_path() -> str:
return "tests/test_files/attention_is_all_you_need.pdf"
Generated
+2 -2
View File
@@ -1,5 +1,5 @@
version = 1
revision = 2
revision = 3
requires-python = ">=3.9, <4.0"
resolution-markers = [
"python_full_version >= '3.14'",
@@ -1596,7 +1596,7 @@ wheels = [
[[package]]
name = "llama-cloud-services"
version = "0.6.72"
version = "0.6.73"
source = { editable = "." }
dependencies = [
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },