mirror of
https://github.com/run-llama/template-workflow-data-extraction.git
synced 2026-07-01 21:34:19 -04:00
Update all of the templates to remove test-proj, and migrate from vibe-llama templates
This commit is contained in:
@@ -1,69 +0,0 @@
|
||||
name: Check Template Regeneration
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
check-template:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
|
||||
- name: Run regeneration check
|
||||
run: uv run copier/copy_utils.py check-regeneration
|
||||
|
||||
check-python:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
|
||||
- name: Run Python checks
|
||||
run: uv run hatch run all-check
|
||||
working-directory: test-proj
|
||||
|
||||
check-ui:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '24'
|
||||
|
||||
- name: Enable Corepack
|
||||
run: corepack enable
|
||||
|
||||
- name: Activate pnpm version
|
||||
working-directory: test-proj/ui
|
||||
run: corepack prepare --activate
|
||||
|
||||
|
||||
- name: Run UI checks
|
||||
run: pnpm run all-check
|
||||
working-directory: test-proj/ui
|
||||
+6
-10
@@ -1,11 +1,7 @@
|
||||
test-proj/uv.lock
|
||||
.venv
|
||||
uv.lock
|
||||
expected-proj
|
||||
test-proj/.venv
|
||||
test-proj/ui/node_modules
|
||||
test-proj/ui/pnpm-lock.yaml
|
||||
ui/pnpm-lock.yaml
|
||||
ui/package-lock.json
|
||||
test-proj/ui/package-lock.json
|
||||
.env
|
||||
__pycache__
|
||||
workflows.db
|
||||
|
||||
.venv
|
||||
package-lock.json
|
||||
node_modules
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
.env
|
||||
__pycache__
|
||||
workflows.db
|
||||
@@ -1,87 +0,0 @@
|
||||
# Contributor Workflow
|
||||
|
||||
This project uses a "materialized" approach for template development. You make changes to a generated project (`test-proj/`) and then use a script to copy those changes back into the template source. This allows for a more interactive development experience.
|
||||
|
||||
The core script for this workflow is `copier/copy_utils.py`.
|
||||
|
||||
## Development Steps
|
||||
|
||||
### 1. Initial Setup
|
||||
|
||||
Ensure your repository is in a clean state (no uncommitted changes). Regenerate the `test-proj` directory to ensure it's synchronized with the latest version of the template.
|
||||
|
||||
```bash
|
||||
# Regenerate test-proj from the template
|
||||
uv run --script copier/copy_utils.py regenerate
|
||||
```
|
||||
|
||||
### 2. Develop and Test in `test-proj`
|
||||
|
||||
Make all your desired code changes directly within the `test-proj/` directory. Treat it as a standard project: run development servers, add dependencies, and test your changes live.
|
||||
|
||||
```bash
|
||||
# Example: Work on the UI
|
||||
cd test-proj/ui
|
||||
npm install
|
||||
npm dev
|
||||
```
|
||||
|
||||
Run validation checks to ensure your changes are correct before propagating them.
|
||||
|
||||
```bash
|
||||
# From the project root directory
|
||||
./copier/copy_utils.py check-python
|
||||
# or within test-proj/, run `uv run hatch run all` or the individual script commands, such as `uv run hatch run format`
|
||||
./copier/copy_utils.py check-javascript
|
||||
# or within test-proj/ui/, run `npm run all`, or the individual script commands such as `npm run format`
|
||||
```
|
||||
|
||||
### 3. Commit Your Development Work
|
||||
|
||||
Once you are satisfied with your changes in `test-proj`, it's a good idea to commit them so that you can revert back in case something goes wrong
|
||||
|
||||
```bash
|
||||
git add .
|
||||
git commit -m "WIP: Implement new feature in test-proj"
|
||||
```
|
||||
|
||||
### 4. Propagate Changes to the Template
|
||||
|
||||
Use the `fix-template` command to automatically copy your changes from `test-proj` back into the template source files. It compares `test-proj` against what the current template would generate, showing only meaningful differences.
|
||||
|
||||
```bash
|
||||
# Check what would change (recommended first step)
|
||||
./copier/copy_utils.py check-template
|
||||
|
||||
# Apply changes automatically
|
||||
./copier/copy_utils.py check-template --fix
|
||||
|
||||
# or "fix everything from the materialized
|
||||
./copier/copy_utils.py check-template --fix-format
|
||||
```
|
||||
|
||||
`fix-template` provides:
|
||||
|
||||
- **Automatic Jinja Resolution**: Resolves simple template variable changes (project names, versions, etc.)
|
||||
- **Gitignore Respect**: Only considers files that would be tracked by git, ignoring build artifacts
|
||||
- **Selective Copying**: Copies non-templated files and auto-resolved template files back to the template
|
||||
|
||||
### 5. Handle Remaining Manual Updates
|
||||
|
||||
For complex `.jinja` files that can't be auto-resolved, you'll need manual intervention:
|
||||
|
||||
1. Open the modified file in `test-proj` (e.g., `test-proj/pyproject.toml`)
|
||||
2. Open the corresponding template file (e.g., `pyproject.toml.jinja`)
|
||||
3. Carefully apply the changes, ensuring you retain or add the necessary Jinja templating logic
|
||||
|
||||
The tool will indicate which files need manual resolution.
|
||||
|
||||
### 6. Verify Template Integrity
|
||||
|
||||
After propagating all changes, verify that the template is consistent by running the `check-regeneration` command. This command regenerates `test-proj` and checks for any differences.
|
||||
|
||||
```bash
|
||||
uv run --script copier/copy_utils.py check-regeneration
|
||||
```
|
||||
|
||||
If this command reports any differences, it indicates that a change was not correctly propagated to the template. You will need to identify the discrepancy, fix the template file(s), and run the check again. A successful run will report no differences.
|
||||
@@ -1,5 +0,0 @@
|
||||
# Getting Started
|
||||
|
||||
This is a copier template. Run it with `uvx copier copy gh:run-llama/template-workflow-data-extraction <name>`, follow the web prompts to login, and proceed. After completion, you should have a directory created under `<name>`, and you can open it and follow its `README.md` to get running.
|
||||
|
||||
You can later "merge" updates to the project if this template is updated with `uvx copier update -A` within the project
|
||||
+1
-9
@@ -17,12 +17,4 @@ project_title:
|
||||
project_name_snake:
|
||||
type: str
|
||||
default: "{{ project_name.replace('-', '_') }}"
|
||||
when: false
|
||||
|
||||
_exclude:
|
||||
- "test-proj"
|
||||
- ".git"
|
||||
- ".github"
|
||||
- "copier"
|
||||
- "CONTRIBUTING.md"
|
||||
- "copier.yaml"
|
||||
when: false
|
||||
@@ -1,867 +0,0 @@
|
||||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# dependencies=[
|
||||
# "copier",
|
||||
# "click",
|
||||
# "pyyaml",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import warnings
|
||||
|
||||
# Suppress deprecation warnings from copier
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||
|
||||
import os
|
||||
import difflib
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import click
|
||||
import yaml
|
||||
from rich.console import Console
|
||||
|
||||
import copier
|
||||
from copier._template import Template
|
||||
from copier.errors import DirtyLocalWarning
|
||||
|
||||
warnings.filterwarnings("ignore", category=DirtyLocalWarning)
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def run_copier_quietly(src_path: str, dst_path: str, data: Dict[str, str]) -> None:
|
||||
"""Run copier with minimal output."""
|
||||
copier.run_copy(
|
||||
src_path=src_path,
|
||||
dst_path=dst_path,
|
||||
data=data,
|
||||
unsafe=True,
|
||||
quiet=True,
|
||||
vcs_ref="HEAD",
|
||||
)
|
||||
|
||||
|
||||
def render_jinja_string(
|
||||
template_string: str, variables: Dict[str, str], script_dir: Path
|
||||
) -> str:
|
||||
"""Render a Jinja template string using Copier's configuration."""
|
||||
template = Template(url=str(script_dir))
|
||||
|
||||
import jinja2
|
||||
|
||||
jinja_env = jinja2.Environment(
|
||||
loader=jinja2.BaseLoader(),
|
||||
extensions=template.jinja_extensions,
|
||||
**template.envops,
|
||||
)
|
||||
|
||||
return jinja_env.from_string(template_string).render(**variables)
|
||||
|
||||
|
||||
def parse_template_variables() -> Dict[str, str]:
|
||||
"""Parse template variables using Copier's Jinja environment."""
|
||||
script_dir = Path(__file__).parent.parent
|
||||
|
||||
# Read answers from existing materialized project
|
||||
test_proj = script_dir / "test-proj"
|
||||
answers_file = test_proj / ".copier-answers.yml"
|
||||
|
||||
with open(answers_file, "r") as f:
|
||||
answers_data = yaml.safe_load(f)
|
||||
# Filter out copier metadata
|
||||
user_answers = {k: v for k, v in answers_data.items() if not k.startswith("_")}
|
||||
|
||||
# Get template configuration for variable parsing
|
||||
template = Template(url=str(script_dir))
|
||||
|
||||
# Build complete variable context by evaluating template defaults
|
||||
result = dict(user_answers)
|
||||
|
||||
# Multiple passes to handle dependencies between computed variables
|
||||
max_iterations = 10
|
||||
for iteration in range(max_iterations):
|
||||
changed = False
|
||||
for question_name, question_config in template.questions_data.items():
|
||||
if question_name not in result and "default" in question_config:
|
||||
default_value = question_config["default"]
|
||||
if isinstance(default_value, str) and "{{" in default_value:
|
||||
# Evaluate Jinja expression using our helper
|
||||
try:
|
||||
rendered = render_jinja_string(
|
||||
default_value, result, script_dir
|
||||
)
|
||||
result[question_name] = rendered
|
||||
changed = True
|
||||
except Exception:
|
||||
# Skip variables that can't be evaluated yet
|
||||
pass
|
||||
else:
|
||||
result[question_name] = default_value
|
||||
changed = True
|
||||
|
||||
# Stop if no new variables were computed
|
||||
if not changed:
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
## Removed simple line-based resolver in favor of chunk-based approach
|
||||
|
||||
|
||||
def _line_has_jinja_markers(line: str) -> bool:
|
||||
"""Return True if the line appears to contain Jinja syntax."""
|
||||
return ("{{" in line) or ("{%" in line) or ("{#" in line)
|
||||
|
||||
|
||||
def _build_expected_to_template_index_map(
|
||||
template_lines: List[str], expected_lines: List[str]
|
||||
) -> Dict[int, int]:
|
||||
"""Build a best-effort map from expected line index to template line index.
|
||||
|
||||
Uses difflib to align the current template with its rendered expected output.
|
||||
The map records, for each expected index, a nearby template index anchor.
|
||||
"""
|
||||
matcher = difflib.SequenceMatcher(
|
||||
None, template_lines, expected_lines, autojunk=False
|
||||
)
|
||||
mapping: Dict[int, int] = {}
|
||||
|
||||
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
||||
if tag == "equal":
|
||||
# Direct 1:1 alignment for equal blocks
|
||||
span = min(i2 - i1, j2 - j1)
|
||||
for k in range(span):
|
||||
mapping[j1 + k] = i1 + k
|
||||
else:
|
||||
# For changed regions, map expected indices to the closest template index boundary
|
||||
# Use i1 as the anchor template position for the whole expected block [j1, j2)
|
||||
for j in range(j1, j2):
|
||||
mapping.setdefault(j, i1)
|
||||
|
||||
# Also provide a fallback mapping for indexes beyond the last aligned block
|
||||
if expected_lines:
|
||||
last_expected_index = len(expected_lines)
|
||||
last_template_index = len(template_lines)
|
||||
mapping.setdefault(last_expected_index, last_template_index)
|
||||
|
||||
return mapping
|
||||
|
||||
|
||||
def attempt_chunk_based_jinja_resolution(
|
||||
template_file: Path, expected_content: str, actual_content: str
|
||||
) -> Optional[str]:
|
||||
"""Attempt a general chunk-based resolution using difflib hunks.
|
||||
|
||||
- Compute opcodes between expected and actual (materialized) contents
|
||||
- Map expected indexes to template indexes via a separate template↔expected alignment
|
||||
- Apply inserts/deletes/replaces to the template cautiously, skipping regions that contain Jinja markers
|
||||
- Validate by regenerating and comparing
|
||||
"""
|
||||
if not template_file.exists():
|
||||
return None
|
||||
|
||||
with open(template_file, "r", encoding="utf-8") as f:
|
||||
template_content = f.read()
|
||||
|
||||
template_lines = template_content.splitlines()
|
||||
expected_lines = expected_content.splitlines()
|
||||
actual_lines = actual_content.splitlines()
|
||||
|
||||
# Map expected indexes to template indexes using alignment between template and expected
|
||||
exp_to_tpl = _build_expected_to_template_index_map(template_lines, expected_lines)
|
||||
|
||||
# Compute hunks between expected and actual
|
||||
matcher = difflib.SequenceMatcher(
|
||||
None, expected_lines, actual_lines, autojunk=False
|
||||
)
|
||||
|
||||
# Work on a mutable copy of template lines
|
||||
new_template_lines = list(template_lines)
|
||||
delta_offset = 0 # track shifts due to prior insertions/deletions in template list
|
||||
|
||||
def tpl_index_from_expected(exp_index: int) -> int:
|
||||
# Return closest known template index; default to end if missing
|
||||
return exp_to_tpl.get(exp_index, len(new_template_lines)) + delta_offset
|
||||
|
||||
def safe_region_has_jinja(t_start: int, t_end: int) -> bool:
|
||||
# Check any Jinja markers in the region that would be modified
|
||||
for t in range(max(0, t_start), min(len(new_template_lines), t_end)):
|
||||
if _line_has_jinja_markers(new_template_lines[t]):
|
||||
return True
|
||||
return False
|
||||
|
||||
changes_made = False
|
||||
|
||||
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
||||
if tag == "equal":
|
||||
continue
|
||||
|
||||
tpl_start = tpl_index_from_expected(i1)
|
||||
tpl_end = tpl_index_from_expected(i2)
|
||||
|
||||
# Guard: if the region touches Jinja markers, skip this hunk entirely
|
||||
# For inserts, check only the insertion point's immediate neighbors
|
||||
if tag in ("replace", "delete"):
|
||||
if safe_region_has_jinja(tpl_start, tpl_end):
|
||||
continue
|
||||
|
||||
if tag == "insert":
|
||||
insert_lines = actual_lines[j1:j2]
|
||||
# Only insert when the immediate context is free of Jinja markers
|
||||
left_ctx = max(0, tpl_start - 1)
|
||||
right_ctx = min(len(new_template_lines), tpl_start + 1)
|
||||
if any(
|
||||
_line_has_jinja_markers(new_template_lines[t])
|
||||
for t in range(left_ctx, right_ctx)
|
||||
):
|
||||
continue
|
||||
new_template_lines[tpl_start:tpl_start] = insert_lines
|
||||
delta_offset += len(insert_lines)
|
||||
changes_made = True
|
||||
elif tag == "delete":
|
||||
# Delete corresponding template region
|
||||
del_count = max(0, tpl_end - tpl_start)
|
||||
if del_count > 0:
|
||||
del new_template_lines[tpl_start:tpl_end]
|
||||
delta_offset -= del_count
|
||||
changes_made = True
|
||||
elif tag == "replace":
|
||||
replacement_lines = actual_lines[j1:j2]
|
||||
del_count = max(0, tpl_end - tpl_start)
|
||||
# Replace the region
|
||||
new_template_lines[tpl_start:tpl_end] = replacement_lines
|
||||
delta_offset += len(replacement_lines) - del_count
|
||||
changes_made = True
|
||||
|
||||
if not changes_made:
|
||||
return None
|
||||
|
||||
proposed_content = "\n".join(new_template_lines)
|
||||
|
||||
# Ensure a single trailing newline for stability across environments
|
||||
if not proposed_content.endswith("\n"):
|
||||
proposed_content += "\n"
|
||||
|
||||
# Validate the proposed content produces the actual content
|
||||
script_dir = Path(__file__).parent.parent
|
||||
if validate_auto_resolved_template(
|
||||
script_dir, template_file, proposed_content, actual_content
|
||||
):
|
||||
return proposed_content
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def validate_auto_resolved_template(
|
||||
script_dir: Path,
|
||||
template_file: Path,
|
||||
resolved_content: str,
|
||||
expected_materialized_content: str,
|
||||
) -> bool:
|
||||
"""Validate that auto-resolved template produces expected output.
|
||||
|
||||
Returns True if validation passes, False otherwise.
|
||||
"""
|
||||
# Save current template content
|
||||
original_content = None
|
||||
if template_file.exists():
|
||||
with open(template_file, "r", encoding="utf-8") as f:
|
||||
original_content = f.read()
|
||||
|
||||
try:
|
||||
# Write resolved content temporarily
|
||||
template_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(template_file, "w", encoding="utf-8") as f:
|
||||
f.write(resolved_content)
|
||||
|
||||
# Test regeneration in a temp directory
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
test_proj = Path(temp_dir) / "validation-proj"
|
||||
|
||||
run_copier_quietly(
|
||||
str(script_dir),
|
||||
str(test_proj),
|
||||
parse_template_variables(),
|
||||
)
|
||||
|
||||
# Get the materialized file path using existing template mapping logic
|
||||
relative_template_path = template_file.relative_to(script_dir)
|
||||
|
||||
# Use the reverse of map_materialized_to_template_path to get materialized path
|
||||
if relative_template_path.name.endswith(".jinja"):
|
||||
materialized_path_str = str(relative_template_path).removesuffix(
|
||||
".jinja"
|
||||
)
|
||||
else:
|
||||
materialized_path_str = str(relative_template_path)
|
||||
|
||||
# Apply template variable substitution to the path
|
||||
variables = parse_template_variables()
|
||||
materialized_path_str = render_jinja_string(
|
||||
materialized_path_str, variables, script_dir
|
||||
)
|
||||
|
||||
materialized_file = test_proj / materialized_path_str
|
||||
|
||||
if not materialized_file.exists():
|
||||
return False
|
||||
|
||||
# Compare content
|
||||
with open(materialized_file, "r", encoding="utf-8") as f:
|
||||
validation_actual = f.read()
|
||||
|
||||
expected_stripped = expected_materialized_content.strip()
|
||||
actual_stripped = validation_actual.strip()
|
||||
|
||||
return actual_stripped == expected_stripped
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
finally:
|
||||
# Restore original content if it existed
|
||||
if original_content:
|
||||
with open(template_file, "w", encoding="utf-8") as f:
|
||||
f.write(original_content)
|
||||
|
||||
|
||||
def run_git_command(
|
||||
cmd: List[str], cwd: Optional[Path] = None
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
"""Run a git command and return the result."""
|
||||
console.print(f"Running: {' '.join(cmd)}")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, cwd=cwd, capture_output=True, text=True, check=True
|
||||
)
|
||||
return result
|
||||
except subprocess.CalledProcessError as e:
|
||||
console.print(f"Command failed with exit code {e.returncode}", style="bold red")
|
||||
console.print(f"stdout: {e.stdout}", style="bold yellow")
|
||||
console.print(f"stderr: {e.stderr}", style="bold yellow")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_git_tracked_files(directory: Path, respect_gitignore: bool = True) -> set[Path]:
|
||||
"""Get set of files that would be tracked by git (optionally respecting gitignore)."""
|
||||
|
||||
# Files to always ignore
|
||||
ignored_files = {".copier-answers.yml"}
|
||||
|
||||
if not respect_gitignore:
|
||||
# Just return all files, excluding ignored ones
|
||||
tracked_files = set()
|
||||
for file_path in directory.rglob("*"):
|
||||
if file_path.is_file():
|
||||
relative_path = file_path.relative_to(directory)
|
||||
if relative_path.name not in ignored_files:
|
||||
tracked_files.add(relative_path)
|
||||
return tracked_files
|
||||
|
||||
# Use git ls-files to get files that git would track
|
||||
# This respects .gitignore rules
|
||||
result = subprocess.run(
|
||||
["git", "ls-files", "--others", "--cached", "--exclude-standard"],
|
||||
cwd=directory,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
tracked_files = set()
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if line.strip():
|
||||
file_path = directory / line.strip()
|
||||
relative_path = Path(line.strip())
|
||||
if file_path.is_file() and relative_path.name not in ignored_files:
|
||||
tracked_files.add(relative_path)
|
||||
|
||||
return tracked_files
|
||||
|
||||
|
||||
def compare_directories(expected_dir: Path, actual_dir: Path) -> List[str]:
|
||||
"""Compare two directories and return list of files that differ, respecting gitignore."""
|
||||
differences = []
|
||||
|
||||
# Get files in both directories
|
||||
# For expected (temp) directory: get all files (no gitignore)
|
||||
# For actual directory: respect gitignore
|
||||
expected_files = (
|
||||
get_git_tracked_files(expected_dir, respect_gitignore=False)
|
||||
if expected_dir.exists()
|
||||
else set()
|
||||
)
|
||||
actual_files = (
|
||||
get_git_tracked_files(actual_dir, respect_gitignore=True)
|
||||
if actual_dir.exists()
|
||||
else set()
|
||||
)
|
||||
|
||||
# Check for files only in expected
|
||||
for file_path in expected_files - actual_files:
|
||||
differences.append(f"Missing file: {file_path}")
|
||||
|
||||
# Check for files only in actual
|
||||
for file_path in actual_files - expected_files:
|
||||
differences.append(f"Extra file: {file_path}")
|
||||
|
||||
# Check for files that exist in both but differ
|
||||
for file_path in expected_files & actual_files:
|
||||
expected_file = expected_dir / file_path
|
||||
actual_file = actual_dir / file_path
|
||||
|
||||
with open(expected_file, "r", encoding="utf-8") as f:
|
||||
expected_content = f.read()
|
||||
with open(actual_file, "r", encoding="utf-8") as f:
|
||||
actual_content = f.read()
|
||||
|
||||
# Normalize trailing newline-only differences for comparison (force single newline)
|
||||
def _normalize_newline_end(s: str) -> str:
|
||||
return s.rstrip("\n") + "\n"
|
||||
|
||||
if _normalize_newline_end(expected_content) == _normalize_newline_end(actual_content):
|
||||
continue
|
||||
|
||||
if expected_content != actual_content:
|
||||
differences.append(f"Content differs: {file_path}")
|
||||
|
||||
return differences
|
||||
|
||||
|
||||
def compare_with_expected_materialized(
|
||||
script_dir: Path, fix_mode: bool = False
|
||||
) -> None:
|
||||
"""Compare current test-proj with freshly generated template."""
|
||||
|
||||
with console.status(
|
||||
"[bold green]Generating expected materialized version from current template..."
|
||||
):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
expected_proj = Path(temp_dir) / "expected-proj"
|
||||
|
||||
# Generate expected materialized version
|
||||
run_copier_quietly(
|
||||
str(script_dir),
|
||||
str(expected_proj),
|
||||
parse_template_variables(),
|
||||
)
|
||||
|
||||
# Compare expected vs actual
|
||||
test_proj_dir = script_dir / "test-proj"
|
||||
differences = compare_directories(expected_proj, test_proj_dir)
|
||||
|
||||
if not differences:
|
||||
console.print(
|
||||
"✅ test-proj matches expected template output", style="bold green"
|
||||
)
|
||||
return
|
||||
|
||||
console.print(
|
||||
f"\n❌ Found {len(differences)} differences between expected and actual:",
|
||||
style="bold red",
|
||||
)
|
||||
for diff in differences:
|
||||
console.print(f" {diff}")
|
||||
|
||||
files_to_copy = []
|
||||
files_needing_manual_fix = []
|
||||
|
||||
# For files that differ in content, show detailed diff and categorize
|
||||
console.print("\nDetailed differences:")
|
||||
for diff in differences:
|
||||
if diff.startswith("Content differs: "):
|
||||
file_path = diff[len("Content differs: ") :]
|
||||
expected_file = expected_proj / file_path
|
||||
actual_file = test_proj_dir / file_path
|
||||
|
||||
# Determine corresponding template file path
|
||||
template_file_path = map_materialized_to_template_path(
|
||||
script_dir, str(file_path)
|
||||
)
|
||||
template_file = script_dir / template_file_path
|
||||
|
||||
# Read file contents for auto-resolution.
|
||||
try:
|
||||
with open(expected_file, "r", encoding="utf-8") as f:
|
||||
expected_content = f.read()
|
||||
with open(actual_file, "r", encoding="utf-8") as f:
|
||||
actual_content = f.read()
|
||||
except (UnicodeDecodeError, PermissionError):
|
||||
expected_content = None
|
||||
actual_content = None
|
||||
|
||||
console.print(f"\n--- Expected (from template): {file_path}")
|
||||
console.print(f"+++ Actual (in test-proj): {file_path}")
|
||||
|
||||
# Use git diff for better output
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"git",
|
||||
"diff",
|
||||
"--no-index",
|
||||
str(expected_file),
|
||||
str(actual_file),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=script_dir,
|
||||
)
|
||||
# git diff returns 1 when files differ, which is expected
|
||||
if result.stdout:
|
||||
# Skip the file headers and show just the content diff
|
||||
lines = result.stdout.split("\n")
|
||||
for line in lines[4:]: # Skip first 4 lines (headers)
|
||||
if line.strip():
|
||||
console.print(f" {line}")
|
||||
except subprocess.CalledProcessError:
|
||||
# Fallback to basic diff indication
|
||||
console.print(" (Files differ)")
|
||||
|
||||
# Categorize for fixing
|
||||
if template_file_path.endswith(".jinja"):
|
||||
# Try auto-resolution first
|
||||
auto_resolved_content = None
|
||||
if expected_content and actual_content:
|
||||
auto_resolved_content = (
|
||||
attempt_chunk_based_jinja_resolution(
|
||||
template_file, expected_content, actual_content
|
||||
)
|
||||
)
|
||||
|
||||
if auto_resolved_content:
|
||||
# Accept the auto-resolution (our logic is conservative enough)
|
||||
if fix_mode:
|
||||
console.print(
|
||||
f" ✓ Auto-resolved: {template_file_path}"
|
||||
)
|
||||
else:
|
||||
console.print(
|
||||
f" ✓ Would auto-resolve: {template_file_path}"
|
||||
)
|
||||
files_to_copy.append(
|
||||
(
|
||||
str(file_path),
|
||||
template_file_path,
|
||||
None,
|
||||
template_file,
|
||||
auto_resolved_content,
|
||||
)
|
||||
)
|
||||
else:
|
||||
files_needing_manual_fix.append(
|
||||
(file_path, template_file_path)
|
||||
)
|
||||
else:
|
||||
files_to_copy.append(
|
||||
(
|
||||
str(file_path),
|
||||
template_file_path,
|
||||
actual_file,
|
||||
template_file,
|
||||
None,
|
||||
)
|
||||
)
|
||||
|
||||
elif diff.startswith("Extra file: "):
|
||||
file_path = diff[len("Extra file: ") :]
|
||||
actual_file = test_proj_dir / file_path
|
||||
|
||||
# Determine corresponding template file path
|
||||
template_file_path = map_materialized_to_template_path(
|
||||
script_dir, str(file_path)
|
||||
)
|
||||
template_file = script_dir / template_file_path
|
||||
|
||||
console.print(f"\nExtra file in test-proj: {file_path}")
|
||||
|
||||
# Categorize for fixing
|
||||
if template_file_path.endswith(".jinja"):
|
||||
# For extra files, we can't auto-resolve without expected content
|
||||
files_needing_manual_fix.append((file_path, template_file_path))
|
||||
else:
|
||||
files_to_copy.append(
|
||||
(
|
||||
str(file_path),
|
||||
template_file_path,
|
||||
actual_file,
|
||||
template_file,
|
||||
None,
|
||||
)
|
||||
)
|
||||
|
||||
# Provide guidance and optionally fix (outside temp directory)
|
||||
if fix_mode:
|
||||
# Actually fix the files
|
||||
if files_to_copy:
|
||||
console.print(f"\nCopying {len(files_to_copy)} files back to template:")
|
||||
for (
|
||||
relative_path,
|
||||
template_path,
|
||||
actual_file,
|
||||
template_file,
|
||||
auto_resolved_content,
|
||||
) in files_to_copy:
|
||||
console.print(f"Copying {relative_path} → {template_path}")
|
||||
template_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if auto_resolved_content:
|
||||
# Write auto-resolved jinja content
|
||||
with open(template_file, "w", encoding="utf-8") as f:
|
||||
f.write(auto_resolved_content)
|
||||
else:
|
||||
# Copy regular file
|
||||
shutil.copy2(actual_file, template_file)
|
||||
|
||||
if files_needing_manual_fix:
|
||||
console.print(
|
||||
f"\n⚠️ {len(files_needing_manual_fix)} templated files need manual resolution:"
|
||||
)
|
||||
for materialized_path, template_path in files_needing_manual_fix:
|
||||
console.print(f" {materialized_path} → {template_path}")
|
||||
else:
|
||||
# In check mode, just show what would happen
|
||||
if files_to_copy or files_needing_manual_fix:
|
||||
console.print("\nWould make the following changes:")
|
||||
if files_to_copy:
|
||||
console.print(f" Copy {len(files_to_copy)} files back to template")
|
||||
if files_needing_manual_fix:
|
||||
console.print(
|
||||
f" {len(files_needing_manual_fix)} files need manual resolution"
|
||||
)
|
||||
console.print("\nTo apply changes, run: fix-template")
|
||||
|
||||
|
||||
def map_materialized_to_template_path(script_dir: Path, materialized_path: str) -> str:
|
||||
"""Map a materialized file path back to its template path."""
|
||||
path_parts: tuple[str, ...] = Path(materialized_path).parts
|
||||
|
||||
# Handle the special case of src/{computed_name}/ → src/{{ project_name_snake }}/
|
||||
variables = parse_template_variables()
|
||||
project_name_snake = variables.get("project_name_snake", "test_proj")
|
||||
if (
|
||||
len(path_parts) >= 2
|
||||
and path_parts[0] == "src"
|
||||
and path_parts[1] == project_name_snake
|
||||
):
|
||||
# Replace computed name with the template variable
|
||||
new_parts: tuple[str, ...] = ("src", "{{ project_name_snake }}") + path_parts[
|
||||
2:
|
||||
]
|
||||
template_path: str = str(Path(*new_parts))
|
||||
|
||||
# Check if a .jinja version exists
|
||||
jinja_path: str = template_path + ".jinja"
|
||||
if (script_dir / jinja_path).exists():
|
||||
return jinja_path
|
||||
return template_path
|
||||
|
||||
# For other paths, check if .jinja version exists
|
||||
jinja_path: str = materialized_path + ".jinja"
|
||||
if (script_dir / jinja_path).exists():
|
||||
return jinja_path
|
||||
|
||||
return materialized_path
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli() -> None:
|
||||
"""Template validation and fixing tools."""
|
||||
pass
|
||||
|
||||
|
||||
def regenerate_test_proj(script_dir: Path) -> None:
|
||||
"""Regenerate the test-proj directory using copier."""
|
||||
test_proj_dir: Path = script_dir / "test-proj"
|
||||
|
||||
# Parse template variables before deleting the directory
|
||||
variables = parse_template_variables() if test_proj_dir.exists() else {}
|
||||
|
||||
# Delete the test-proj directory if it exists
|
||||
if test_proj_dir.exists():
|
||||
console.print(f"Deleting {test_proj_dir}")
|
||||
shutil.rmtree(test_proj_dir)
|
||||
else:
|
||||
console.print(f"Directory {test_proj_dir} does not exist")
|
||||
|
||||
# Run copier to regenerate test-proj
|
||||
with console.status("[bold green]Running copier to regenerate test-proj..."):
|
||||
run_copier_quietly(
|
||||
str(script_dir),
|
||||
str(test_proj_dir),
|
||||
variables,
|
||||
)
|
||||
|
||||
# Revert the .copier-answers.yml file since it gets updated with new revision info
|
||||
answers_file = test_proj_dir / ".copier-answers.yml"
|
||||
if answers_file.exists():
|
||||
try:
|
||||
run_git_command(["git", "restore", str(answers_file)], cwd=script_dir)
|
||||
except SystemExit:
|
||||
# If git restore fails (e.g., file not tracked), just continue
|
||||
pass
|
||||
|
||||
|
||||
def get_script_dir_and_setup() -> Path:
|
||||
"""Get the script directory and set up working directory. Common setup for all commands."""
|
||||
script_dir: Path = Path(__file__).parent.parent
|
||||
os.chdir(script_dir)
|
||||
console.print(f"Working directory: {script_dir}")
|
||||
return script_dir
|
||||
|
||||
|
||||
def ensure_test_proj_exists(script_dir: Path) -> Path:
|
||||
"""Ensure test-proj directory exists and return its path."""
|
||||
test_proj_dir: Path = script_dir / "test-proj"
|
||||
if not test_proj_dir.exists():
|
||||
console.print(
|
||||
"Error: test-proj directory does not exist. Run 'regenerate' first.",
|
||||
style="bold red",
|
||||
)
|
||||
sys.exit(1)
|
||||
return test_proj_dir
|
||||
|
||||
|
||||
@cli.command()
|
||||
def regenerate() -> None:
|
||||
"""Regenerate test-proj directory using copier."""
|
||||
script_dir: Path = get_script_dir_and_setup()
|
||||
|
||||
# Check for uncommitted changes before starting
|
||||
console.print("Checking for uncommitted changes...")
|
||||
git_status_check: subprocess.CompletedProcess[str] = run_git_command(
|
||||
["git", "status", "--porcelain"], cwd=script_dir
|
||||
)
|
||||
if git_status_check.stdout.strip():
|
||||
console.print(
|
||||
"Error: Repository has uncommitted changes. Please commit or stash them first.",
|
||||
style="bold red",
|
||||
)
|
||||
console.print(git_status_check.stdout)
|
||||
sys.exit(1)
|
||||
|
||||
regenerate_test_proj(script_dir)
|
||||
console.print("✓ test-proj regenerated")
|
||||
|
||||
|
||||
@cli.command()
|
||||
def check_regeneration() -> None:
|
||||
"""Check if generated files match template (assumes test-proj already exists)."""
|
||||
script_dir: Path = get_script_dir_and_setup()
|
||||
|
||||
regenerate_test_proj(script_dir)
|
||||
|
||||
# Check if generated files match template
|
||||
console.print("Checking generated files against template...")
|
||||
git_status: subprocess.CompletedProcess[str] = run_git_command(
|
||||
["git", "status", "--porcelain"], cwd=script_dir
|
||||
)
|
||||
|
||||
if git_status.stdout.strip():
|
||||
console.print("\n❌ Generated files do not match template!", style="bold red")
|
||||
console.print("\nFiles that differ:")
|
||||
console.print(git_status.stdout)
|
||||
|
||||
console.print("\nDifferences:")
|
||||
git_diff: subprocess.CompletedProcess[str] = run_git_command(
|
||||
["git", "diff"], cwd=script_dir
|
||||
)
|
||||
console.print(git_diff.stdout)
|
||||
|
||||
console.print(
|
||||
"\nTo fix: If these changes look good, likely you just need to run regenerate and commit the changes.",
|
||||
style="bold red",
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
console.print("✓ Generated files match template")
|
||||
|
||||
|
||||
def run_python_checks(test_proj_dir: Path, fix: bool) -> None:
|
||||
"""Run Python validation checks on test-proj using hatch."""
|
||||
# Run Python checks with hatch
|
||||
console.print("Running Python validation checks...")
|
||||
run_git_command(
|
||||
["uv", "run", "hatch", "run", "all-fix" if fix else "all-check"],
|
||||
cwd=test_proj_dir,
|
||||
)
|
||||
console.print("✓ Python checks passed")
|
||||
|
||||
|
||||
@cli.command("check-python")
|
||||
@click.option("--fix", is_flag=True, help="Fix formatting issues automatically.")
|
||||
def check_python(fix: bool) -> None:
|
||||
"""Run Python validation checks on test-proj using hatch."""
|
||||
script_dir: Path = get_script_dir_and_setup()
|
||||
test_proj_dir: Path = ensure_test_proj_exists(script_dir)
|
||||
run_python_checks(test_proj_dir, fix)
|
||||
|
||||
|
||||
def run_javascript_checks(test_proj_dir: Path, fix: bool) -> None:
|
||||
"""Run TypeScript and format validation checks on test-proj/ui using npm."""
|
||||
ui_dir: Path = test_proj_dir / "ui"
|
||||
|
||||
# Check if ui directory exists
|
||||
if not ui_dir.exists():
|
||||
console.print("Error: test-proj/ui directory does not exist.", style="bold red")
|
||||
sys.exit(1)
|
||||
|
||||
# Run TypeScript checks with npm
|
||||
console.print("Running TypeScript validation checks...")
|
||||
run_git_command(["npm", "run", "all-fix" if fix else "all-check"], cwd=ui_dir)
|
||||
console.print("✓ TypeScript checks passed")
|
||||
|
||||
|
||||
@cli.command("check-javascript")
|
||||
@click.option("--fix", is_flag=True, help="Fix formatting issues automatically.")
|
||||
def check_javascript(fix: bool) -> None:
|
||||
"""Run TypeScript and format validation checks on test-proj/ui using npm."""
|
||||
script_dir: Path = get_script_dir_and_setup()
|
||||
test_proj_dir: Path = ensure_test_proj_exists(script_dir)
|
||||
run_javascript_checks(test_proj_dir, fix)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
"--fix",
|
||||
is_flag=True,
|
||||
help="Fix template files by copying back changes from materialized test-proj.",
|
||||
)
|
||||
@click.option(
|
||||
"--fix-format",
|
||||
is_flag=True,
|
||||
help="Run Python and JavaScript formatters before fixing template files. Implies --fix.",
|
||||
)
|
||||
def check_template(fix: bool, fix_format: bool) -> None:
|
||||
"""Fix template files by copying back changes from materialized test-proj.
|
||||
|
||||
Compares test-proj with what the current template would generate and fixes differences.
|
||||
Use --check to preview changes without applying them.
|
||||
"""
|
||||
script_dir: Path = get_script_dir_and_setup()
|
||||
|
||||
# Validate options
|
||||
if fix_format:
|
||||
# implies fix
|
||||
fix = True
|
||||
|
||||
# Check if test-proj exists
|
||||
test_proj_dir: Path = ensure_test_proj_exists(script_dir)
|
||||
|
||||
# Run formatters if requested
|
||||
if fix_format:
|
||||
run_python_checks(test_proj_dir, fix=True)
|
||||
run_javascript_checks(test_proj_dir, fix=True)
|
||||
|
||||
# Use expected materialized comparison approach
|
||||
compare_with_expected_materialized(script_dir, fix_mode=fix)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
@@ -1,5 +0,0 @@
|
||||
# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
|
||||
_commit: c778ba5
|
||||
_src_path: .
|
||||
project_name: test-proj
|
||||
project_title: Test Proj
|
||||
@@ -1,2 +0,0 @@
|
||||
# copy to .env and place any needed secrets here. LLAMA_CLOUD_API_KEY will be automatically set
|
||||
# OPENAI_API_KEY=sk-xxx
|
||||
@@ -1,3 +0,0 @@
|
||||
.env
|
||||
__pycache__
|
||||
workflows.db
|
||||
@@ -1,33 +0,0 @@
|
||||
# Data Extraction and Ingestion
|
||||
|
||||
This is a starter for a [`llama_deploy`](https://github.com/run-llama/cloud_llama_deploy) powered app.
|
||||
See it's [getting started guide](https://github.com/run-llama/cloud_llama_deploy/tree/main/docs/guides/01-getting-started.md)
|
||||
for more info on how to run this project.
|
||||
|
||||
The backend contains a single workflow that runs LlamaCloud Extraction, given your schema. The frontend exposes an
|
||||
extraction review UI, where you can review and correct extractions.
|
||||
|
||||
## Customizing the schema.
|
||||
|
||||
The starter contains a placeholder `MySchema` that is used for extraction. See [`schema.py`](./src/test-proj/schema.py).
|
||||
|
||||
You should customize this `schema.py` for your use case to modify the extracted data. You can also rename the schema from `MySchema` to
|
||||
something more appropriate for your use case. Do a find and replace on "MySchema" to also fix the frontend references.
|
||||
|
||||
The frontend has a copy of the schema as a json schema, that it uses to introspect and generate an editing UI. Run `uv run export-types` to regenerate the frontend json schema.
|
||||
|
||||
## Customizing the application
|
||||
|
||||
This is meant to just be a starting place. You can add more workflows, and trigger them from the UI. For example, you could
|
||||
add functionality sync to a downstream data sink to export the corrected data after review. Or you could add a workflow
|
||||
that monitors a data source, and automatically triggers the extraction against the file.
|
||||
|
||||
### Running Workflows
|
||||
|
||||
The core value of this template is good extraction. The main python code is in the `src` directory.
|
||||
|
||||
Workflows can be triggered from the UI using `useWorkflow` react hooks from the `@llamaindex/ui` library.
|
||||
You can also add a `if __name__ == "__main__":` handler to individual workflows to run and debug them directly.
|
||||
The `process_file.py` has main handler that will upload a `test.pdf` from your current working directory so you
|
||||
can test your extraction directly. Note, while `llamactl serve` will inject your LlamaCloud API key, you will need
|
||||
to set a `LLAMA_CLOUD_API_KEY` in your `.env` file or set an env var to run the `__main__` handler directly.
|
||||
@@ -1,53 +0,0 @@
|
||||
[project]
|
||||
name = "test-proj"
|
||||
version = "0.1.0"
|
||||
description = "Extracts data"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"llama-cloud-services>=0.6.69",
|
||||
"llama-index-workflows>=2.2.0,<3.0.0",
|
||||
"python-dotenv>=1.1.0",
|
||||
"jsonref>=1.1.0",
|
||||
"click>=8.2.1,<8.3.0",
|
||||
"httpx>=0.28.1",
|
||||
"llama-index-core>=0.14.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
export-types = "test_proj.export_types:export_types"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"ruff>=0.11.10",
|
||||
"typescript>=0.0.12",
|
||||
"ty>=0.0.1a16",
|
||||
"pytest>=8.4.1",
|
||||
"hatch>=1.14.1",
|
||||
"llamactl>=0.3.0"
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.envs.default.scripts]
|
||||
"format" = "ruff format ."
|
||||
"format-check" = "ruff format --check ."
|
||||
"lint" = "ruff check --fix ."
|
||||
"lint-check" = ["ruff check ."]
|
||||
typecheck = "ty check src"
|
||||
test = "pytest"
|
||||
"all-check" = ["format-check", "lint-check", "test"]
|
||||
"all-fix" = ["format", "lint", "test"]
|
||||
|
||||
[tool.llamadeploy]
|
||||
env_files = [".env"]
|
||||
llama_cloud = true
|
||||
|
||||
[tool.llamadeploy.workflows]
|
||||
process-file = "test_proj.process_file:workflow"
|
||||
|
||||
[tool.llamadeploy.ui]
|
||||
directory = "ui"
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
import functools
|
||||
import os
|
||||
import httpx
|
||||
|
||||
import dotenv
|
||||
from llama_cloud_services import ExtractionAgent, LlamaExtract
|
||||
from llama_cloud_services.extract import ExtractConfig, ExtractMode
|
||||
from llama_cloud.core.api_error import ApiError
|
||||
from llama_cloud_services.beta.agent_data import AsyncAgentDataClient, ExtractedData
|
||||
from llama_cloud.client import AsyncLlamaCloud
|
||||
from .schemas import MySchema
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# deployed agents may infer their name from the deployment name
|
||||
# Note: Make sure that an agent deployment with this name actually exists
|
||||
# otherwise calls to get or set data will fail. You may need to adjust the `or `
|
||||
# name for development
|
||||
agent_name = os.getenv("LLAMA_DEPLOY_DEPLOYMENT_NAME")
|
||||
agent_name_or_default = agent_name or "test-proj"
|
||||
# required for all llama cloud calls
|
||||
api_key = os.environ["LLAMA_CLOUD_API_KEY"]
|
||||
# get this in case running against a different environment than production
|
||||
base_url = os.getenv("LLAMA_CLOUD_BASE_URL")
|
||||
extracted_data_collection = "test-proj"
|
||||
project_id = os.getenv("LLAMA_DEPLOY_PROJECT_ID")
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def get_extract_agent() -> ExtractionAgent:
|
||||
extract_api = LlamaExtract(
|
||||
api_key=api_key, base_url=base_url, project_id=project_id
|
||||
)
|
||||
config = ExtractConfig(
|
||||
extraction_mode=ExtractMode.PREMIUM,
|
||||
system_prompt=None,
|
||||
# advanced
|
||||
use_reasoning=False,
|
||||
cite_sources=False,
|
||||
confidence_scores=True,
|
||||
)
|
||||
try:
|
||||
existing = extract_api.get_agent(agent_name_or_default)
|
||||
existing.data_schema = MySchema
|
||||
existing.config = config
|
||||
return existing
|
||||
except ApiError as e:
|
||||
if e.status_code == 404:
|
||||
return extract_api.create_agent(
|
||||
name=agent_name_or_default, data_schema=MySchema, config=config
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def get_data_client() -> AsyncAgentDataClient:
|
||||
return AsyncAgentDataClient(
|
||||
deployment_name=agent_name,
|
||||
collection=extracted_data_collection,
|
||||
# update MySchema for your schema, but retain the ExtractedData container
|
||||
type=ExtractedData[MySchema],
|
||||
client=get_llama_cloud_client(),
|
||||
)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def get_llama_cloud_client():
|
||||
return AsyncLlamaCloud(
|
||||
base_url=base_url,
|
||||
token=api_key,
|
||||
httpx_client=httpx.AsyncClient(
|
||||
timeout=60, headers={"Project-Id": project_id} if project_id else None
|
||||
),
|
||||
)
|
||||
@@ -1,80 +0,0 @@
|
||||
"""
|
||||
Script to export pydantic types from a python file (default "schemas.py") to json schemas and then to typescript interfaces.
|
||||
|
||||
For sharing types precisely between python and typescript
|
||||
"""
|
||||
|
||||
import importlib.util
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import ModuleType
|
||||
|
||||
import jsonref
|
||||
from pydantic import BaseModel
|
||||
import click
|
||||
|
||||
|
||||
def run_command(cmd: str):
|
||||
result = subprocess.run(cmd, shell=True)
|
||||
if result.returncode != 0:
|
||||
print(f"Command failed: {cmd}", file=sys.stderr)
|
||||
sys.exit(result.returncode)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
"--schema-file",
|
||||
default="schemas.py",
|
||||
help="The name of the model file to export types from",
|
||||
)
|
||||
def export_types(schema_file: str):
|
||||
app_path = Path(__file__).parent.parent.parent
|
||||
print("Exporting types...")
|
||||
schema_path = Path(__file__).parent / schema_file
|
||||
if not schema_path.exists():
|
||||
raise click.BadParameter(f"Schema file '{schema_file}' not found in app")
|
||||
print(f"Exporting types from {schema_path}...")
|
||||
output_dir = app_path / "ui" / "src" / "schemas"
|
||||
if output_dir.exists():
|
||||
shutil.rmtree(output_dir)
|
||||
os.makedirs(output_dir)
|
||||
export_schemas(schema_path, output_dir)
|
||||
generate_typescript_interfaces(output_dir)
|
||||
|
||||
|
||||
def generate_typescript_interfaces(schema_dir: Path):
|
||||
run_command(
|
||||
f"npx -y json-schema-to-typescript@15.0.4 -i '{schema_dir / '*.json'}' -o {schema_dir} --additionalProperties=false"
|
||||
)
|
||||
|
||||
|
||||
def load_module_from_path(module_name: str, file_path: Path) -> ModuleType:
|
||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ValueError(f"Failed to load module from {file_path}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def export_schemas(py_file: Path, output_dir: Path):
|
||||
module_name = os.path.splitext(os.path.basename(py_file))[0]
|
||||
module = load_module_from_path(module_name, py_file)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
for name, obj in inspect.getmembers(module):
|
||||
if inspect.isclass(obj) and issubclass(obj, BaseModel) and obj is not BaseModel:
|
||||
schema = obj.model_json_schema()
|
||||
normalized_schema = jsonref.replace_refs(schema, proxies=False)
|
||||
with open(os.path.join(output_dir, f"{name}.json"), "w") as f:
|
||||
f.write(json.dumps(normalized_schema, indent=2))
|
||||
print(f"Exported {name} to {name}.json")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
export_types()
|
||||
@@ -1,208 +0,0 @@
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
from typing import Any, Literal
|
||||
|
||||
import httpx
|
||||
from llama_cloud import ExtractRun
|
||||
from llama_cloud_services.extract import SourceText
|
||||
from llama_cloud_services.beta.agent_data import ExtractedData, InvalidExtractionData
|
||||
from workflows import Context, Workflow, step
|
||||
from workflows.events import Event, StartEvent, StopEvent
|
||||
from workflows.retry_policy import ConstantDelayRetryPolicy
|
||||
|
||||
from .config import get_llama_cloud_client, get_data_client, get_extract_agent
|
||||
from .schemas import MySchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileEvent(StartEvent):
|
||||
file_id: str
|
||||
|
||||
|
||||
class DownloadFileEvent(Event):
|
||||
file_id: str
|
||||
|
||||
|
||||
class FileDownloadedEvent(Event):
|
||||
file_id: str
|
||||
file_path: str
|
||||
filename: str
|
||||
|
||||
|
||||
class UIToast(Event):
|
||||
level: Literal["info", "warning", "error"]
|
||||
message: str
|
||||
|
||||
|
||||
class ExtractedEvent(Event):
|
||||
data: ExtractedData[MySchema]
|
||||
|
||||
|
||||
class ExtractedInvalidEvent(Event):
|
||||
data: ExtractedData[dict[str, Any]]
|
||||
|
||||
|
||||
class ProcessFileWorkflow(Workflow):
|
||||
"""
|
||||
Given a file path, this workflow will process a single file through the custom extraction logic.
|
||||
"""
|
||||
|
||||
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
|
||||
async def run_file(self, event: FileEvent) -> DownloadFileEvent:
|
||||
logger.info(f"Running file {event.file_id}")
|
||||
return DownloadFileEvent(file_id=event.file_id)
|
||||
|
||||
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
|
||||
async def download_file(
|
||||
self, event: DownloadFileEvent, ctx: Context
|
||||
) -> FileDownloadedEvent:
|
||||
"""Download the file reference from the cloud storage"""
|
||||
try:
|
||||
file_metadata = await get_llama_cloud_client().files.get_file(
|
||||
id=event.file_id
|
||||
)
|
||||
file_url = await get_llama_cloud_client().files.read_file_content(
|
||||
event.file_id
|
||||
)
|
||||
|
||||
temp_dir = tempfile.gettempdir()
|
||||
filename = file_metadata.name
|
||||
file_path = os.path.join(temp_dir, filename)
|
||||
client = httpx.AsyncClient()
|
||||
# Report progress to the UI
|
||||
logger.info(f"Downloading file {file_url.url} to {file_path}")
|
||||
|
||||
async with client.stream("GET", file_url.url) as response:
|
||||
with open(file_path, "wb") as f:
|
||||
async for chunk in response.aiter_bytes():
|
||||
f.write(chunk)
|
||||
logger.info(f"Downloaded file {file_url.url} to {file_path}")
|
||||
return FileDownloadedEvent(
|
||||
file_id=event.file_id, file_path=file_path, filename=filename
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading file {event.file_id}: {e}", exc_info=True)
|
||||
ctx.write_event_to_stream(
|
||||
UIToast(
|
||||
level="error",
|
||||
message=f"Error downloading file {event.file_id}: {e}",
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
|
||||
async def process_file(
|
||||
self, event: FileDownloadedEvent, ctx: Context
|
||||
) -> ExtractedEvent | ExtractedInvalidEvent:
|
||||
"""Runs the extraction against the file"""
|
||||
try:
|
||||
agent = get_extract_agent()
|
||||
# track the content of the file, so as to be able to de-duplicate
|
||||
file_content = Path(event.file_path).read_bytes()
|
||||
file_hash = hashlib.sha256(file_content).hexdigest()
|
||||
source_text = SourceText(
|
||||
file=event.file_path,
|
||||
filename=event.filename,
|
||||
)
|
||||
logger.info(f"Extracting data from file {event.filename}")
|
||||
ctx.write_event_to_stream(
|
||||
UIToast(
|
||||
level="info", message=f"Extracting data from file {event.filename}"
|
||||
)
|
||||
)
|
||||
extracted_result: ExtractRun = await agent.aextract(source_text)
|
||||
try:
|
||||
logger.info(f"Extracted data: {extracted_result}")
|
||||
data = ExtractedData.from_extraction_result(
|
||||
result=extracted_result,
|
||||
schema=MySchema,
|
||||
file_hash=file_hash,
|
||||
)
|
||||
return ExtractedEvent(data=data)
|
||||
except InvalidExtractionData as e:
|
||||
logger.error(f"Error validating extracted data: {e}", exc_info=True)
|
||||
return ExtractedInvalidEvent(data=e.invalid_item)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error extracting data from file {event.filename}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
ctx.write_event_to_stream(
|
||||
UIToast(
|
||||
level="error",
|
||||
message=f"Error extracting data from file {event.filename}: {e}",
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
|
||||
async def record_extracted_data(
|
||||
self, event: ExtractedEvent | ExtractedInvalidEvent, ctx: Context
|
||||
) -> StopEvent:
|
||||
"""Records the extracted data to the agent data API"""
|
||||
try:
|
||||
logger.info(f"Recorded extracted data for file {event.data.file_name}")
|
||||
ctx.write_event_to_stream(
|
||||
UIToast(
|
||||
level="info",
|
||||
message=f"Recorded extracted data for file {event.data.file_name}",
|
||||
)
|
||||
)
|
||||
# remove past data when reprocessing the same file
|
||||
if event.data.file_hash:
|
||||
existing_data = await get_data_client().search(
|
||||
filter={
|
||||
"file_hash": {
|
||||
"eq": event.data.file_hash,
|
||||
},
|
||||
},
|
||||
)
|
||||
if existing_data.items:
|
||||
logger.info(
|
||||
f"Removing past data for file {event.data.file_name} with hash {event.data.file_hash}"
|
||||
)
|
||||
await asyncio.gather(
|
||||
*[
|
||||
get_data_client().delete_item(item.id)
|
||||
for item in existing_data.items
|
||||
]
|
||||
)
|
||||
# finally, save the new data
|
||||
item_id = await get_data_client().create_item(event.data)
|
||||
return StopEvent(
|
||||
result=item_id.id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error recording extracted data for file {event.data.file_name}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
ctx.write_event_to_stream(
|
||||
UIToast(
|
||||
level="error",
|
||||
message=f"Error recording extracted data for file {event.data.file_name}: {e}",
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
|
||||
workflow = ProcessFileWorkflow(timeout=None)
|
||||
|
||||
if __name__ == "__main__":
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
async def main():
|
||||
file = await get_llama_cloud_client().files.upload_file(
|
||||
upload_file=Path("test.pdf").open("rb")
|
||||
)
|
||||
await workflow.run(start_event=FileEvent(file_id=file.id))
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -1,7 +0,0 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
# Rename and extend this, and then regenerate the js types with `uv run export-types`
|
||||
# Additional schemas can be added here to share them between python and typescript
|
||||
class MySchema(BaseModel):
|
||||
hello: str
|
||||
@@ -1,2 +0,0 @@
|
||||
def test_placeholder():
|
||||
pass
|
||||
@@ -1,43 +0,0 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
/dist/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
# Data Extraction UI
|
||||
|
||||
This is a simple next.js template that builds on the @llamaindex/agent-app ui component library
|
||||
for showing displaying tables of extracted data.
|
||||
|
||||
Ideally run this with `llamactl` in the parent directory (See [README.md](../README.md)),
|
||||
but you can also run it standalone with `npm run dev`, but workflow integrations will not work
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"$schema": "https://ui.shadcn.com/schema.json",
|
||||
"style": "new-york",
|
||||
"rsc": true,
|
||||
"tsx": true,
|
||||
"tailwind": {
|
||||
"config": "",
|
||||
"css": "src/index.css",
|
||||
"baseColor": "zinc",
|
||||
"cssVariables": true,
|
||||
"prefix": ""
|
||||
},
|
||||
"aliases": {
|
||||
"components": "@/components",
|
||||
"utils": "@/lib/utils",
|
||||
"ui": "@/components/ui",
|
||||
"lib": "@/lib",
|
||||
"hooks": "@/hooks"
|
||||
},
|
||||
"iconLibrary": "lucide"
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Test Proj</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,45 +0,0 @@
|
||||
{
|
||||
"name": "test-proj-ui",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc && vite build",
|
||||
"preview": "vite preview",
|
||||
"lint": "tsc --noEmit",
|
||||
"format": "prettier --write src",
|
||||
"format-check": "prettier --check src",
|
||||
"all-check": "pnpm i && pnpm run lint && pnpm run format-check && pnpm run build",
|
||||
"all-fix": "pnpm i && pnpm run lint && pnpm run format && pnpm run build"
|
||||
},
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.27.6",
|
||||
"@lezer/highlight": "^1.2.1",
|
||||
"@llamaindex/ui": "^2.1.2",
|
||||
"@radix-ui/themes": "^3.2.1",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"llama-cloud-services": "^0.3.4",
|
||||
"lucide-react": "^0.514.0",
|
||||
"react": "^18.3.0",
|
||||
"react-dom": "^18.3.0",
|
||||
"react-router-dom": "^6.30.0",
|
||||
"sonner": "^2.0.5",
|
||||
"tw-animate-css": "^1.3.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/postcss": "^4.1.10",
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"@vitejs/plugin-react": "^4.3.4",
|
||||
"postcss": "^8.5.5",
|
||||
"prettier": "^3.6.2",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
"tailwindcss": "^4.1.8",
|
||||
"typescript": "^5",
|
||||
"vite": "^6.0.5"
|
||||
},
|
||||
"packageManager": "pnpm@10.11.1+sha512.e519b9f7639869dc8d5c3c5dfef73b3f091094b0a006d7317353c72b124e80e1afd429732e28705ad6bfa1ee879c1fce46c128ccebd3192101f43dd67c667912"
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
const config = {
|
||||
plugins: {
|
||||
"@tailwindcss/postcss": {},
|
||||
},
|
||||
};
|
||||
|
||||
export default config;
|
||||
@@ -1,71 +0,0 @@
|
||||
import React from "react";
|
||||
import { Routes, Route } from "react-router-dom";
|
||||
import { Theme } from "@radix-ui/themes";
|
||||
import {
|
||||
Breadcrumb,
|
||||
BreadcrumbItem,
|
||||
BreadcrumbList,
|
||||
BreadcrumbSeparator,
|
||||
ApiProvider,
|
||||
} from "@llamaindex/ui";
|
||||
import { Link } from "react-router-dom";
|
||||
import { Toaster } from "@llamaindex/ui";
|
||||
import { useToolbar, ToolbarProvider } from "@/lib/ToolbarContext";
|
||||
import { clients } from "@/lib/client";
|
||||
|
||||
// Import pages
|
||||
import HomePage from "./pages/HomePage";
|
||||
import ItemPage from "./pages/ItemPage";
|
||||
|
||||
export default function App() {
|
||||
return (
|
||||
<Theme>
|
||||
<ApiProvider clients={clients}>
|
||||
<ToolbarProvider>
|
||||
<div className="grid grid-rows-[auto_1fr] h-screen">
|
||||
<Toolbar />
|
||||
<main className="overflow-auto">
|
||||
<Routes>
|
||||
<Route path="/" element={<HomePage />} />
|
||||
<Route path="/item/:itemId" element={<ItemPage />} />
|
||||
</Routes>
|
||||
</main>
|
||||
</div>
|
||||
<Toaster />
|
||||
</ToolbarProvider>
|
||||
</ApiProvider>
|
||||
</Theme>
|
||||
);
|
||||
}
|
||||
|
||||
const Toolbar = () => {
|
||||
const { buttons, breadcrumbs } = useToolbar();
|
||||
|
||||
return (
|
||||
<header className="sticky top-0 z-50 flex h-16 shrink-0 items-center gap-2 border-b px-4 bg-white/95 backdrop-blur supports-[backdrop-filter]:bg-white/60">
|
||||
<Breadcrumb>
|
||||
<BreadcrumbList>
|
||||
{breadcrumbs.map((item, index) => (
|
||||
<React.Fragment key={index}>
|
||||
{index > 0 && <BreadcrumbSeparator />}
|
||||
<BreadcrumbItem>
|
||||
{item.href && !item.isCurrentPage ? (
|
||||
<Link to={item.href} className="font-medium text-base">
|
||||
{item.label}
|
||||
</Link>
|
||||
) : (
|
||||
<span
|
||||
className={`font-medium ${index === 0 ? "text-base" : ""}`}
|
||||
>
|
||||
{item.label}
|
||||
</span>
|
||||
)}
|
||||
</BreadcrumbItem>
|
||||
</React.Fragment>
|
||||
))}
|
||||
</BreadcrumbList>
|
||||
</Breadcrumb>
|
||||
{buttons}
|
||||
</header>
|
||||
);
|
||||
};
|
||||
@@ -1,120 +0,0 @@
|
||||
@import "tailwindcss";
|
||||
@import "tw-animate-css";
|
||||
|
||||
@custom-variant dark (&:is(.dark *));
|
||||
|
||||
@theme inline {
|
||||
--radius-sm: calc(var(--radius) - 4px);
|
||||
--radius-md: calc(var(--radius) - 2px);
|
||||
--radius-lg: var(--radius);
|
||||
--radius-xl: calc(var(--radius) + 4px);
|
||||
--color-background: var(--background);
|
||||
--color-foreground: var(--foreground);
|
||||
--color-card: var(--card);
|
||||
--color-card-foreground: var(--card-foreground);
|
||||
--color-popover: var(--popover);
|
||||
--color-popover-foreground: var(--popover-foreground);
|
||||
--color-primary: var(--primary);
|
||||
--color-primary-foreground: var(--primary-foreground);
|
||||
--color-secondary: var(--secondary);
|
||||
--color-secondary-foreground: var(--secondary-foreground);
|
||||
--color-muted: var(--muted);
|
||||
--color-muted-foreground: var(--muted-foreground);
|
||||
--color-accent: var(--accent);
|
||||
--color-accent-foreground: var(--accent-foreground);
|
||||
--color-destructive: var(--destructive);
|
||||
--color-border: var(--border);
|
||||
--color-input: var(--input);
|
||||
--color-ring: var(--ring);
|
||||
--color-chart-1: var(--chart-1);
|
||||
--color-chart-2: var(--chart-2);
|
||||
--color-chart-3: var(--chart-3);
|
||||
--color-chart-4: var(--chart-4);
|
||||
--color-chart-5: var(--chart-5);
|
||||
--color-sidebar: var(--sidebar);
|
||||
--color-sidebar-foreground: var(--sidebar-foreground);
|
||||
--color-sidebar-primary: var(--sidebar-primary);
|
||||
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
|
||||
--color-sidebar-accent: var(--sidebar-accent);
|
||||
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
|
||||
--color-sidebar-border: var(--sidebar-border);
|
||||
--color-sidebar-ring: var(--sidebar-ring);
|
||||
}
|
||||
|
||||
:root {
|
||||
--radius: 0.625rem;
|
||||
--card: oklch(1 0 0);
|
||||
--card-foreground: oklch(0.141 0.005 285.823);
|
||||
--popover: oklch(1 0 0);
|
||||
--popover-foreground: oklch(0.141 0.005 285.823);
|
||||
--primary: oklch(0.21 0.006 285.885);
|
||||
--primary-foreground: oklch(0.985 0 0);
|
||||
--secondary: oklch(0.967 0.001 286.375);
|
||||
--secondary-foreground: oklch(0.21 0.006 285.885);
|
||||
--muted: oklch(0.967 0.001 286.375);
|
||||
--muted-foreground: oklch(0.552 0.016 285.938);
|
||||
--accent: oklch(0.967 0.001 286.375);
|
||||
--accent-foreground: oklch(0.21 0.006 285.885);
|
||||
--destructive: oklch(0.577 0.245 27.325);
|
||||
--border: oklch(0.92 0.004 286.32);
|
||||
--input: oklch(0.92 0.004 286.32);
|
||||
--ring: oklch(0.705 0.015 286.067);
|
||||
--chart-1: oklch(0.646 0.222 41.116);
|
||||
--chart-2: oklch(0.6 0.118 184.704);
|
||||
--chart-3: oklch(0.398 0.07 227.392);
|
||||
--chart-4: oklch(0.828 0.189 84.429);
|
||||
--chart-5: oklch(0.769 0.188 70.08);
|
||||
--sidebar: oklch(0.985 0 0);
|
||||
--sidebar-foreground: oklch(0.141 0.005 285.823);
|
||||
--sidebar-primary: oklch(0.21 0.006 285.885);
|
||||
--sidebar-primary-foreground: oklch(0.985 0 0);
|
||||
--sidebar-accent: oklch(0.967 0.001 286.375);
|
||||
--sidebar-accent-foreground: oklch(0.21 0.006 285.885);
|
||||
--sidebar-border: oklch(0.92 0.004 286.32);
|
||||
--sidebar-ring: oklch(0.705 0.015 286.067);
|
||||
--background: oklch(1 0 0);
|
||||
--foreground: oklch(0.141 0.005 285.823);
|
||||
}
|
||||
|
||||
.dark {
|
||||
--background: oklch(0.141 0.005 285.823);
|
||||
--foreground: oklch(0.985 0 0);
|
||||
--card: oklch(0.21 0.006 285.885);
|
||||
--card-foreground: oklch(0.985 0 0);
|
||||
--popover: oklch(0.21 0.006 285.885);
|
||||
--popover-foreground: oklch(0.985 0 0);
|
||||
--primary: oklch(0.92 0.004 286.32);
|
||||
--primary-foreground: oklch(0.21 0.006 285.885);
|
||||
--secondary: oklch(0.274 0.006 286.033);
|
||||
--secondary-foreground: oklch(0.985 0 0);
|
||||
--muted: oklch(0.274 0.006 286.033);
|
||||
--muted-foreground: oklch(0.705 0.015 286.067);
|
||||
--accent: oklch(0.274 0.006 286.033);
|
||||
--accent-foreground: oklch(0.985 0 0);
|
||||
--destructive: oklch(0.704 0.191 22.216);
|
||||
--border: oklch(1 0 0 / 10%);
|
||||
--input: oklch(1 0 0 / 15%);
|
||||
--ring: oklch(0.552 0.016 285.938);
|
||||
--chart-1: oklch(0.488 0.243 264.376);
|
||||
--chart-2: oklch(0.696 0.17 162.48);
|
||||
--chart-3: oklch(0.769 0.188 70.08);
|
||||
--chart-4: oklch(0.627 0.265 303.9);
|
||||
--chart-5: oklch(0.645 0.246 16.439);
|
||||
--sidebar: oklch(0.21 0.006 285.885);
|
||||
--sidebar-foreground: oklch(0.985 0 0);
|
||||
--sidebar-primary: oklch(0.488 0.243 264.376);
|
||||
--sidebar-primary-foreground: oklch(0.985 0 0);
|
||||
--sidebar-accent: oklch(0.274 0.006 286.033);
|
||||
--sidebar-accent-foreground: oklch(0.985 0 0);
|
||||
--sidebar-border: oklch(1 0 0 / 10%);
|
||||
--sidebar-ring: oklch(0.552 0.016 285.938);
|
||||
}
|
||||
|
||||
@layer base {
|
||||
* {
|
||||
@apply border-border outline-ring/50;
|
||||
}
|
||||
body {
|
||||
@apply bg-background text-foreground;
|
||||
}
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
import React from "react";
|
||||
import { APP_TITLE } from "./config";
|
||||
|
||||
export interface BreadcrumbItem {
|
||||
label: string;
|
||||
href?: string;
|
||||
isCurrentPage?: boolean;
|
||||
}
|
||||
|
||||
export const ToolbarCtx = React.createContext<{
|
||||
buttons: React.ReactNode[];
|
||||
setButtons: (fn: (prev: React.ReactNode[]) => React.ReactNode[]) => void;
|
||||
breadcrumbs: BreadcrumbItem[];
|
||||
setBreadcrumbs: (items: BreadcrumbItem[]) => void;
|
||||
}>({
|
||||
buttons: [],
|
||||
setButtons: () => {},
|
||||
breadcrumbs: [],
|
||||
setBreadcrumbs: () => {},
|
||||
});
|
||||
|
||||
export const ToolbarProvider = ({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const [buttons, setButtons] = React.useState<React.ReactNode[]>([]);
|
||||
const [breadcrumbs, setBreadcrumbs] = React.useState<BreadcrumbItem[]>([
|
||||
{ label: APP_TITLE, href: "/" },
|
||||
]);
|
||||
|
||||
return (
|
||||
<ToolbarCtx.Provider
|
||||
value={{ buttons, setButtons, breadcrumbs, setBreadcrumbs }}
|
||||
>
|
||||
{children}
|
||||
</ToolbarCtx.Provider>
|
||||
);
|
||||
};
|
||||
|
||||
export const useToolbar = () => React.useContext(ToolbarCtx);
|
||||
@@ -1,45 +0,0 @@
|
||||
import { MySchema } from "@/schemas/MySchema";
|
||||
import { ExtractedData } from "llama-cloud-services/beta/agent";
|
||||
import {
|
||||
ApiClients,
|
||||
createWorkflowsClient,
|
||||
createWorkflowsConfig,
|
||||
} from "@llamaindex/ui";
|
||||
import { createCloudAgentClient, cloudApiClient } from "@llamaindex/ui";
|
||||
import { AGENT_NAME, EXTRACTED_DATA_COLLECTION } from "./config";
|
||||
|
||||
const platformToken = import.meta.env.VITE_LLAMA_CLOUD_API_KEY;
|
||||
const apiBaseUrl = import.meta.env.VITE_LLAMA_CLOUD_BASE_URL;
|
||||
const projectId = import.meta.env.VITE_LLAMA_DEPLOY_PROJECT_ID;
|
||||
|
||||
// Configure the platform client
|
||||
cloudApiClient.setConfig({
|
||||
...(apiBaseUrl && { baseUrl: apiBaseUrl }),
|
||||
headers: {
|
||||
// optionally use a backend API token scoped to a project. For local development,
|
||||
...(platformToken && { authorization: `Bearer ${platformToken}` }),
|
||||
// This header is required for requests to correctly scope to the agent's project
|
||||
// when authenticating with a user cookie
|
||||
...(projectId && { "Project-Id": projectId }),
|
||||
},
|
||||
});
|
||||
|
||||
const agentClient = createCloudAgentClient<ExtractedData<MySchema>>({
|
||||
client: cloudApiClient,
|
||||
windowUrl: typeof window !== "undefined" ? window.location.href : undefined,
|
||||
collection: EXTRACTED_DATA_COLLECTION,
|
||||
});
|
||||
|
||||
const workflowsClient = createWorkflowsClient(
|
||||
createWorkflowsConfig({
|
||||
baseUrl: `/deployments/${AGENT_NAME}/`,
|
||||
}),
|
||||
);
|
||||
|
||||
const clients: ApiClients = {
|
||||
workflowsClient: workflowsClient,
|
||||
cloudApiClient: cloudApiClient,
|
||||
agentDataClient: agentClient,
|
||||
};
|
||||
|
||||
export { clients, agentClient };
|
||||
@@ -1,3 +0,0 @@
|
||||
export const APP_TITLE = "Test Proj";
|
||||
export const EXTRACTED_DATA_COLLECTION = "test-proj";
|
||||
export const AGENT_NAME = import.meta.env.VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME;
|
||||
@@ -1,6 +0,0 @@
|
||||
import { clsx, type ClassValue } from "clsx";
|
||||
import { twMerge } from "tailwind-merge";
|
||||
|
||||
export function cn(...inputs: ClassValue[]) {
|
||||
return twMerge(clsx(inputs));
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
import { StrictMode } from "react";
|
||||
import { createRoot } from "react-dom/client";
|
||||
import { HashRouter } from "react-router-dom";
|
||||
import App from "./App";
|
||||
import "@llamaindex/ui/styles.css";
|
||||
import "./index.css";
|
||||
|
||||
createRoot(document.getElementById("root")!).render(
|
||||
<StrictMode>
|
||||
<HashRouter>
|
||||
<App />
|
||||
</HashRouter>
|
||||
</StrictMode>,
|
||||
);
|
||||
@@ -1,23 +0,0 @@
|
||||
.main {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.grid {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
gap: 1rem;
|
||||
margin-bottom: 1rem;
|
||||
& > * {
|
||||
flex: 1;
|
||||
}
|
||||
}
|
||||
|
||||
.commandBar {
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.progressBar {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
import {
|
||||
ItemCount,
|
||||
WorkflowTrigger,
|
||||
WorkflowProgressBar,
|
||||
ExtractedDataItemGrid,
|
||||
useWorkflowHandlerList,
|
||||
} from "@llamaindex/ui";
|
||||
import type { TypedAgentData } from "llama-cloud-services/beta/agent";
|
||||
import styles from "./HomePage.module.css";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
export default function HomePage() {
|
||||
const { taskKey } = taskCompletedState();
|
||||
return <TaskList key={taskKey} />;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a key that increments when a task is completed, can be used to force a re-render of the task list
|
||||
*/
|
||||
function taskCompletedState() {
|
||||
const { handlers } = useWorkflowHandlerList("process-file");
|
||||
const runningTasks = handlers.filter(
|
||||
(handler) => handler.status === "running",
|
||||
);
|
||||
const [runningTaskCount, setRunningTaskCount] = useState(runningTasks.length);
|
||||
const [taskKey, setTaskKey] = useState(0);
|
||||
useEffect(() => {
|
||||
if (runningTasks.length < runningTaskCount) {
|
||||
// forcefully reload task list after a task is completed
|
||||
setTaskKey(taskKey + 1);
|
||||
}
|
||||
setRunningTaskCount(runningTasks.length);
|
||||
}, [runningTasks.length]);
|
||||
return { runningTaskCount, taskKey };
|
||||
}
|
||||
|
||||
function TaskList() {
|
||||
const navigate = useNavigate();
|
||||
const goToItem = (item: TypedAgentData) => {
|
||||
navigate(`/item/${item.id}`);
|
||||
};
|
||||
return (
|
||||
<div className={styles.page}>
|
||||
<main className={styles.main}>
|
||||
<div className={styles.grid}>
|
||||
<ItemCount title="Total Items" />
|
||||
<ItemCount
|
||||
title="Reviewed"
|
||||
filter={{
|
||||
status: { eq: "approved" },
|
||||
}}
|
||||
/>
|
||||
<ItemCount
|
||||
title="Needs Review"
|
||||
filter={{
|
||||
status: { eq: "pending_review" },
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<div className={styles.commandBar}>
|
||||
<WorkflowTrigger
|
||||
workflowName="process-file"
|
||||
customWorkflowInput={(files) => {
|
||||
return {
|
||||
file_id: files[0].fileId,
|
||||
};
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<WorkflowProgressBar
|
||||
className={styles.progressBar}
|
||||
workflowName="process-file"
|
||||
/>
|
||||
<ExtractedDataItemGrid
|
||||
onRowClick={goToItem}
|
||||
builtInColumns={{
|
||||
fileName: true,
|
||||
status: true,
|
||||
createdAt: true,
|
||||
itemsToReview: true,
|
||||
actions: true,
|
||||
}}
|
||||
/>
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,138 +0,0 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import {
|
||||
AcceptReject,
|
||||
ExtractedDataDisplay,
|
||||
FilePreview,
|
||||
useItemData,
|
||||
type Highlight,
|
||||
} from "@llamaindex/ui";
|
||||
import { Clock, XCircle } from "lucide-react";
|
||||
import { useParams } from "react-router-dom";
|
||||
import type { MySchema } from "../schemas/MySchema";
|
||||
import MyJsonSchema from "../schemas/MySchema.json" with { type: "json" };
|
||||
import { useToolbar } from "@/lib/ToolbarContext";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
import { modifyJsonSchema } from "@llamaindex/ui/lib";
|
||||
import { APP_TITLE } from "@/lib/config";
|
||||
|
||||
export default function ItemPage() {
|
||||
const { itemId } = useParams<{ itemId: string }>();
|
||||
const { setButtons, setBreadcrumbs } = useToolbar();
|
||||
const [highlight, setHighlight] = useState<Highlight | undefined>(undefined);
|
||||
|
||||
// Use the hook to fetch item data
|
||||
const itemHookData = useItemData<MySchema>({
|
||||
// order/remove fields as needed here
|
||||
jsonSchema: modifyJsonSchema(MyJsonSchema as any, {}),
|
||||
itemId: itemId as string,
|
||||
isMock: false,
|
||||
});
|
||||
|
||||
const navigate = useNavigate();
|
||||
|
||||
// Update breadcrumb when item data loads
|
||||
useEffect(() => {
|
||||
const fileName = itemHookData.item?.data?.file_name;
|
||||
if (fileName) {
|
||||
setBreadcrumbs([
|
||||
{ label: APP_TITLE, href: "/" },
|
||||
{
|
||||
label: fileName,
|
||||
isCurrentPage: true,
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
return () => {
|
||||
// Reset to default breadcrumb when leaving the page
|
||||
setBreadcrumbs([{ label: APP_TITLE, href: "/" }]);
|
||||
};
|
||||
}, [itemHookData.item?.data?.file_name, setBreadcrumbs]);
|
||||
|
||||
useEffect(() => {
|
||||
setButtons(() => [
|
||||
<div className="ml-auto flex items-center">
|
||||
<AcceptReject<MySchema>
|
||||
itemData={itemHookData}
|
||||
onComplete={() => navigate("/")}
|
||||
/>
|
||||
</div>,
|
||||
]);
|
||||
return () => {
|
||||
setButtons(() => []);
|
||||
};
|
||||
}, [itemHookData.data, setButtons]);
|
||||
|
||||
const {
|
||||
item: itemData,
|
||||
updateData,
|
||||
loading: isLoading,
|
||||
error,
|
||||
} = itemHookData;
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="flex h-screen items-center justify-center">
|
||||
<div className="text-center">
|
||||
<Clock className="h-8 w-8 animate-spin mx-auto mb-2" />
|
||||
<div className="text-sm text-gray-500">Loading item...</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (error || !itemData) {
|
||||
return (
|
||||
<div className="flex h-screen items-center justify-center">
|
||||
<div className="text-center">
|
||||
<XCircle className="h-8 w-8 text-red-500 mx-auto mb-2" />
|
||||
<div className="text-sm text-gray-500">
|
||||
Error loading item: {error || "Item not found"}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex h-full bg-gray-50">
|
||||
{/* Left Side - File Preview */}
|
||||
<div className="w-1/2 border-r border-gray-200 bg-white">
|
||||
{itemData.data.file_id && (
|
||||
<FilePreview
|
||||
fileId={itemData.data.file_id}
|
||||
onBoundingBoxClick={(box, pageNumber) => {
|
||||
console.log("Bounding box clicked:", box, "on page:", pageNumber);
|
||||
}}
|
||||
highlight={highlight}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Right Side - Review Panel */}
|
||||
<div className="flex-1 bg-white h-full overflow-y-auto">
|
||||
<div className="p-4 space-y-4">
|
||||
{/* Extracted Data */}
|
||||
<ExtractedDataDisplay<MySchema>
|
||||
extractedData={itemData.data}
|
||||
title="Extracted Data"
|
||||
onChange={(updatedData) => {
|
||||
updateData(updatedData);
|
||||
}}
|
||||
onClickField={(args) => {
|
||||
// TODO: set multiple highlights
|
||||
setHighlight({
|
||||
page: args.metadata?.citation?.[0]?.page ?? 1,
|
||||
x: 100,
|
||||
y: 100,
|
||||
width: 0,
|
||||
height: 0,
|
||||
});
|
||||
}}
|
||||
jsonSchema={itemHookData.jsonSchema}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
-12
@@ -1,12 +0,0 @@
|
||||
/* eslint-disable */
|
||||
/**
|
||||
* This file was automatically generated by json-schema-to-typescript.
|
||||
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
||||
* and run json-schema-to-typescript to regenerate this file.
|
||||
*/
|
||||
|
||||
export type Hello = string;
|
||||
|
||||
export interface MySchema {
|
||||
hello: Hello;
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
{
|
||||
"properties": {
|
||||
"hello": {
|
||||
"title": "Hello",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["hello"],
|
||||
"title": "MySchema",
|
||||
"type": "object"
|
||||
}
|
||||
Vendored
-15
@@ -1,15 +0,0 @@
|
||||
/// <reference types="vite/client" />
|
||||
|
||||
interface ImportMetaEnv {
|
||||
readonly VITE_LLAMA_CLOUD_API_KEY?: string;
|
||||
readonly VITE_LLAMA_CLOUD_BASE_URL?: string;
|
||||
|
||||
// injected from llama_deploy
|
||||
readonly VITE_LLAMA_DEPLOY_BASE_PATH: string;
|
||||
readonly VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME: string;
|
||||
readonly VITE_LLAMA_DEPLOY_PROJECT_ID: string;
|
||||
}
|
||||
|
||||
interface ImportMeta {
|
||||
readonly env: ImportMetaEnv;
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"useDefineForClassFields": true,
|
||||
"lib": ["ES2020", "DOM", "DOM.Iterable"],
|
||||
"module": "ESNext",
|
||||
"skipLibCheck": true,
|
||||
|
||||
/* Bundler mode */
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx",
|
||||
|
||||
/* Linting */
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
|
||||
/* Path mapping */
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["src", "vite.config.ts", "src/vite-env.d.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
import { defineConfig } from "vite";
|
||||
import react from "@vitejs/plugin-react";
|
||||
import path from "path";
|
||||
|
||||
// https://vitejs.dev/config/
|
||||
export default defineConfig(({}) => {
|
||||
const deploymentName = process.env.LLAMA_DEPLOY_DEPLOYMENT_NAME;
|
||||
const basePath = process.env.LLAMA_DEPLOY_DEPLOYMENT_BASE_PATH;
|
||||
const projectId = process.env.LLAMA_DEPLOY_PROJECT_ID;
|
||||
const port = process.env.PORT ? Number(process.env.PORT) : 3000;
|
||||
const baseUrl = process.env.LLAMA_CLOUD_BASE_URL;
|
||||
return {
|
||||
plugins: [react()],
|
||||
resolve: {
|
||||
alias: {
|
||||
"@": path.resolve(__dirname, "./src"),
|
||||
},
|
||||
},
|
||||
server: {
|
||||
port: port,
|
||||
host: true,
|
||||
},
|
||||
build: {
|
||||
outDir: "dist",
|
||||
sourcemap: true,
|
||||
},
|
||||
base: basePath,
|
||||
define: {
|
||||
// Primary define uses NAME
|
||||
"import.meta.env.VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME": JSON.stringify(
|
||||
deploymentName
|
||||
),
|
||||
"import.meta.env.VITE_LLAMA_DEPLOY_DEPLOYMENT_BASE_PATH": JSON.stringify(basePath),
|
||||
...(projectId && {
|
||||
"import.meta.env.VITE_LLAMA_DEPLOY_PROJECT_ID":
|
||||
JSON.stringify(projectId),
|
||||
}),
|
||||
...(baseUrl && {
|
||||
"import.meta.env.VITE_LLAMA_CLOUD_BASE_URL": JSON.stringify(baseUrl),
|
||||
}),
|
||||
},
|
||||
};
|
||||
});
|
||||
Reference in New Issue
Block a user