Update all of the templates to remove test-proj, and migrate from vibe-llama templates

This commit is contained in:
Adrian Lyjak
2025-09-27 13:13:31 -04:00
parent 707f6cfd39
commit 5e48554603
39 changed files with 7 additions and 2314 deletions
-69
View File
@@ -1,69 +0,0 @@
name: Check Template Regeneration
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
check-template:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Run regeneration check
run: uv run copier/copy_utils.py check-regeneration
check-python:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Run Python checks
run: uv run hatch run all-check
working-directory: test-proj
check-ui:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '24'
- name: Enable Corepack
run: corepack enable
- name: Activate pnpm version
working-directory: test-proj/ui
run: corepack prepare --activate
- name: Run UI checks
run: pnpm run all-check
working-directory: test-proj/ui
+6 -10
View File
@@ -1,11 +1,7 @@
test-proj/uv.lock
.venv
uv.lock
expected-proj
test-proj/.venv
test-proj/ui/node_modules
test-proj/ui/pnpm-lock.yaml
ui/pnpm-lock.yaml
ui/package-lock.json
test-proj/ui/package-lock.json
.env
__pycache__
workflows.db
.venv
package-lock.json
node_modules
-3
View File
@@ -1,3 +0,0 @@
.env
__pycache__
workflows.db
-87
View File
@@ -1,87 +0,0 @@
# Contributor Workflow
This project uses a "materialized" approach for template development. You make changes to a generated project (`test-proj/`) and then use a script to copy those changes back into the template source. This allows for a more interactive development experience.
The core script for this workflow is `copier/copy_utils.py`.
## Development Steps
### 1. Initial Setup
Ensure your repository is in a clean state (no uncommitted changes). Regenerate the `test-proj` directory to ensure it's synchronized with the latest version of the template.
```bash
# Regenerate test-proj from the template
uv run --script copier/copy_utils.py regenerate
```
### 2. Develop and Test in `test-proj`
Make all your desired code changes directly within the `test-proj/` directory. Treat it as a standard project: run development servers, add dependencies, and test your changes live.
```bash
# Example: Work on the UI
cd test-proj/ui
npm install
npm dev
```
Run validation checks to ensure your changes are correct before propagating them.
```bash
# From the project root directory
./copier/copy_utils.py check-python
# or within test-proj/, run `uv run hatch run all` or the individual script commands, such as `uv run hatch run format`
./copier/copy_utils.py check-javascript
# or within test-proj/ui/, run `npm run all`, or the individual script commands such as `npm run format`
```
### 3. Commit Your Development Work
Once you are satisfied with your changes in `test-proj`, it's a good idea to commit them so that you can revert back in case something goes wrong
```bash
git add .
git commit -m "WIP: Implement new feature in test-proj"
```
### 4. Propagate Changes to the Template
Use the `fix-template` command to automatically copy your changes from `test-proj` back into the template source files. It compares `test-proj` against what the current template would generate, showing only meaningful differences.
```bash
# Check what would change (recommended first step)
./copier/copy_utils.py check-template
# Apply changes automatically
./copier/copy_utils.py check-template --fix
# or "fix everything from the materialized
./copier/copy_utils.py check-template --fix-format
```
`fix-template` provides:
- **Automatic Jinja Resolution**: Resolves simple template variable changes (project names, versions, etc.)
- **Gitignore Respect**: Only considers files that would be tracked by git, ignoring build artifacts
- **Selective Copying**: Copies non-templated files and auto-resolved template files back to the template
### 5. Handle Remaining Manual Updates
For complex `.jinja` files that can't be auto-resolved, you'll need manual intervention:
1. Open the modified file in `test-proj` (e.g., `test-proj/pyproject.toml`)
2. Open the corresponding template file (e.g., `pyproject.toml.jinja`)
3. Carefully apply the changes, ensuring you retain or add the necessary Jinja templating logic
The tool will indicate which files need manual resolution.
### 6. Verify Template Integrity
After propagating all changes, verify that the template is consistent by running the `check-regeneration` command. This command regenerates `test-proj` and checks for any differences.
```bash
uv run --script copier/copy_utils.py check-regeneration
```
If this command reports any differences, it indicates that a change was not correctly propagated to the template. You will need to identify the discrepancy, fix the template file(s), and run the check again. A successful run will report no differences.
-5
View File
@@ -1,5 +0,0 @@
# Getting Started
This is a copier template. Run it with `uvx copier copy gh:run-llama/template-workflow-data-extraction <name>`, follow the web prompts to login, and proceed. After completion, you should have a directory created under `<name>`, and you can open it and follow its `README.md` to get running.
You can later "merge" updates to the project if this template is updated with `uvx copier update -A` within the project
+1 -9
View File
@@ -17,12 +17,4 @@ project_title:
project_name_snake:
type: str
default: "{{ project_name.replace('-', '_') }}"
when: false
_exclude:
- "test-proj"
- ".git"
- ".github"
- "copier"
- "CONTRIBUTING.md"
- "copier.yaml"
when: false
-867
View File
@@ -1,867 +0,0 @@
#!/usr/bin/env -S uv run --script
# /// script
# dependencies=[
# "copier",
# "click",
# "pyyaml",
# "rich",
# ]
# ///
import warnings
# Suppress deprecation warnings from copier
warnings.filterwarnings("ignore", category=DeprecationWarning)
import os
import difflib
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Dict, List, Optional
import click
import yaml
from rich.console import Console
import copier
from copier._template import Template
from copier.errors import DirtyLocalWarning
warnings.filterwarnings("ignore", category=DirtyLocalWarning)
console = Console()
def run_copier_quietly(src_path: str, dst_path: str, data: Dict[str, str]) -> None:
"""Run copier with minimal output."""
copier.run_copy(
src_path=src_path,
dst_path=dst_path,
data=data,
unsafe=True,
quiet=True,
vcs_ref="HEAD",
)
def render_jinja_string(
template_string: str, variables: Dict[str, str], script_dir: Path
) -> str:
"""Render a Jinja template string using Copier's configuration."""
template = Template(url=str(script_dir))
import jinja2
jinja_env = jinja2.Environment(
loader=jinja2.BaseLoader(),
extensions=template.jinja_extensions,
**template.envops,
)
return jinja_env.from_string(template_string).render(**variables)
def parse_template_variables() -> Dict[str, str]:
"""Parse template variables using Copier's Jinja environment."""
script_dir = Path(__file__).parent.parent
# Read answers from existing materialized project
test_proj = script_dir / "test-proj"
answers_file = test_proj / ".copier-answers.yml"
with open(answers_file, "r") as f:
answers_data = yaml.safe_load(f)
# Filter out copier metadata
user_answers = {k: v for k, v in answers_data.items() if not k.startswith("_")}
# Get template configuration for variable parsing
template = Template(url=str(script_dir))
# Build complete variable context by evaluating template defaults
result = dict(user_answers)
# Multiple passes to handle dependencies between computed variables
max_iterations = 10
for iteration in range(max_iterations):
changed = False
for question_name, question_config in template.questions_data.items():
if question_name not in result and "default" in question_config:
default_value = question_config["default"]
if isinstance(default_value, str) and "{{" in default_value:
# Evaluate Jinja expression using our helper
try:
rendered = render_jinja_string(
default_value, result, script_dir
)
result[question_name] = rendered
changed = True
except Exception:
# Skip variables that can't be evaluated yet
pass
else:
result[question_name] = default_value
changed = True
# Stop if no new variables were computed
if not changed:
break
return result
## Removed simple line-based resolver in favor of chunk-based approach
def _line_has_jinja_markers(line: str) -> bool:
"""Return True if the line appears to contain Jinja syntax."""
return ("{{" in line) or ("{%" in line) or ("{#" in line)
def _build_expected_to_template_index_map(
template_lines: List[str], expected_lines: List[str]
) -> Dict[int, int]:
"""Build a best-effort map from expected line index to template line index.
Uses difflib to align the current template with its rendered expected output.
The map records, for each expected index, a nearby template index anchor.
"""
matcher = difflib.SequenceMatcher(
None, template_lines, expected_lines, autojunk=False
)
mapping: Dict[int, int] = {}
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == "equal":
# Direct 1:1 alignment for equal blocks
span = min(i2 - i1, j2 - j1)
for k in range(span):
mapping[j1 + k] = i1 + k
else:
# For changed regions, map expected indices to the closest template index boundary
# Use i1 as the anchor template position for the whole expected block [j1, j2)
for j in range(j1, j2):
mapping.setdefault(j, i1)
# Also provide a fallback mapping for indexes beyond the last aligned block
if expected_lines:
last_expected_index = len(expected_lines)
last_template_index = len(template_lines)
mapping.setdefault(last_expected_index, last_template_index)
return mapping
def attempt_chunk_based_jinja_resolution(
template_file: Path, expected_content: str, actual_content: str
) -> Optional[str]:
"""Attempt a general chunk-based resolution using difflib hunks.
- Compute opcodes between expected and actual (materialized) contents
- Map expected indexes to template indexes via a separate template↔expected alignment
- Apply inserts/deletes/replaces to the template cautiously, skipping regions that contain Jinja markers
- Validate by regenerating and comparing
"""
if not template_file.exists():
return None
with open(template_file, "r", encoding="utf-8") as f:
template_content = f.read()
template_lines = template_content.splitlines()
expected_lines = expected_content.splitlines()
actual_lines = actual_content.splitlines()
# Map expected indexes to template indexes using alignment between template and expected
exp_to_tpl = _build_expected_to_template_index_map(template_lines, expected_lines)
# Compute hunks between expected and actual
matcher = difflib.SequenceMatcher(
None, expected_lines, actual_lines, autojunk=False
)
# Work on a mutable copy of template lines
new_template_lines = list(template_lines)
delta_offset = 0 # track shifts due to prior insertions/deletions in template list
def tpl_index_from_expected(exp_index: int) -> int:
# Return closest known template index; default to end if missing
return exp_to_tpl.get(exp_index, len(new_template_lines)) + delta_offset
def safe_region_has_jinja(t_start: int, t_end: int) -> bool:
# Check any Jinja markers in the region that would be modified
for t in range(max(0, t_start), min(len(new_template_lines), t_end)):
if _line_has_jinja_markers(new_template_lines[t]):
return True
return False
changes_made = False
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == "equal":
continue
tpl_start = tpl_index_from_expected(i1)
tpl_end = tpl_index_from_expected(i2)
# Guard: if the region touches Jinja markers, skip this hunk entirely
# For inserts, check only the insertion point's immediate neighbors
if tag in ("replace", "delete"):
if safe_region_has_jinja(tpl_start, tpl_end):
continue
if tag == "insert":
insert_lines = actual_lines[j1:j2]
# Only insert when the immediate context is free of Jinja markers
left_ctx = max(0, tpl_start - 1)
right_ctx = min(len(new_template_lines), tpl_start + 1)
if any(
_line_has_jinja_markers(new_template_lines[t])
for t in range(left_ctx, right_ctx)
):
continue
new_template_lines[tpl_start:tpl_start] = insert_lines
delta_offset += len(insert_lines)
changes_made = True
elif tag == "delete":
# Delete corresponding template region
del_count = max(0, tpl_end - tpl_start)
if del_count > 0:
del new_template_lines[tpl_start:tpl_end]
delta_offset -= del_count
changes_made = True
elif tag == "replace":
replacement_lines = actual_lines[j1:j2]
del_count = max(0, tpl_end - tpl_start)
# Replace the region
new_template_lines[tpl_start:tpl_end] = replacement_lines
delta_offset += len(replacement_lines) - del_count
changes_made = True
if not changes_made:
return None
proposed_content = "\n".join(new_template_lines)
# Ensure a single trailing newline for stability across environments
if not proposed_content.endswith("\n"):
proposed_content += "\n"
# Validate the proposed content produces the actual content
script_dir = Path(__file__).parent.parent
if validate_auto_resolved_template(
script_dir, template_file, proposed_content, actual_content
):
return proposed_content
return None
def validate_auto_resolved_template(
script_dir: Path,
template_file: Path,
resolved_content: str,
expected_materialized_content: str,
) -> bool:
"""Validate that auto-resolved template produces expected output.
Returns True if validation passes, False otherwise.
"""
# Save current template content
original_content = None
if template_file.exists():
with open(template_file, "r", encoding="utf-8") as f:
original_content = f.read()
try:
# Write resolved content temporarily
template_file.parent.mkdir(parents=True, exist_ok=True)
with open(template_file, "w", encoding="utf-8") as f:
f.write(resolved_content)
# Test regeneration in a temp directory
with tempfile.TemporaryDirectory() as temp_dir:
test_proj = Path(temp_dir) / "validation-proj"
run_copier_quietly(
str(script_dir),
str(test_proj),
parse_template_variables(),
)
# Get the materialized file path using existing template mapping logic
relative_template_path = template_file.relative_to(script_dir)
# Use the reverse of map_materialized_to_template_path to get materialized path
if relative_template_path.name.endswith(".jinja"):
materialized_path_str = str(relative_template_path).removesuffix(
".jinja"
)
else:
materialized_path_str = str(relative_template_path)
# Apply template variable substitution to the path
variables = parse_template_variables()
materialized_path_str = render_jinja_string(
materialized_path_str, variables, script_dir
)
materialized_file = test_proj / materialized_path_str
if not materialized_file.exists():
return False
# Compare content
with open(materialized_file, "r", encoding="utf-8") as f:
validation_actual = f.read()
expected_stripped = expected_materialized_content.strip()
actual_stripped = validation_actual.strip()
return actual_stripped == expected_stripped
except Exception:
return False
finally:
# Restore original content if it existed
if original_content:
with open(template_file, "w", encoding="utf-8") as f:
f.write(original_content)
def run_git_command(
cmd: List[str], cwd: Optional[Path] = None
) -> subprocess.CompletedProcess[str]:
"""Run a git command and return the result."""
console.print(f"Running: {' '.join(cmd)}")
try:
result = subprocess.run(
cmd, cwd=cwd, capture_output=True, text=True, check=True
)
return result
except subprocess.CalledProcessError as e:
console.print(f"Command failed with exit code {e.returncode}", style="bold red")
console.print(f"stdout: {e.stdout}", style="bold yellow")
console.print(f"stderr: {e.stderr}", style="bold yellow")
sys.exit(1)
def get_git_tracked_files(directory: Path, respect_gitignore: bool = True) -> set[Path]:
"""Get set of files that would be tracked by git (optionally respecting gitignore)."""
# Files to always ignore
ignored_files = {".copier-answers.yml"}
if not respect_gitignore:
# Just return all files, excluding ignored ones
tracked_files = set()
for file_path in directory.rglob("*"):
if file_path.is_file():
relative_path = file_path.relative_to(directory)
if relative_path.name not in ignored_files:
tracked_files.add(relative_path)
return tracked_files
# Use git ls-files to get files that git would track
# This respects .gitignore rules
result = subprocess.run(
["git", "ls-files", "--others", "--cached", "--exclude-standard"],
cwd=directory,
capture_output=True,
text=True,
check=True,
)
tracked_files = set()
for line in result.stdout.strip().split("\n"):
if line.strip():
file_path = directory / line.strip()
relative_path = Path(line.strip())
if file_path.is_file() and relative_path.name not in ignored_files:
tracked_files.add(relative_path)
return tracked_files
def compare_directories(expected_dir: Path, actual_dir: Path) -> List[str]:
"""Compare two directories and return list of files that differ, respecting gitignore."""
differences = []
# Get files in both directories
# For expected (temp) directory: get all files (no gitignore)
# For actual directory: respect gitignore
expected_files = (
get_git_tracked_files(expected_dir, respect_gitignore=False)
if expected_dir.exists()
else set()
)
actual_files = (
get_git_tracked_files(actual_dir, respect_gitignore=True)
if actual_dir.exists()
else set()
)
# Check for files only in expected
for file_path in expected_files - actual_files:
differences.append(f"Missing file: {file_path}")
# Check for files only in actual
for file_path in actual_files - expected_files:
differences.append(f"Extra file: {file_path}")
# Check for files that exist in both but differ
for file_path in expected_files & actual_files:
expected_file = expected_dir / file_path
actual_file = actual_dir / file_path
with open(expected_file, "r", encoding="utf-8") as f:
expected_content = f.read()
with open(actual_file, "r", encoding="utf-8") as f:
actual_content = f.read()
# Normalize trailing newline-only differences for comparison (force single newline)
def _normalize_newline_end(s: str) -> str:
return s.rstrip("\n") + "\n"
if _normalize_newline_end(expected_content) == _normalize_newline_end(actual_content):
continue
if expected_content != actual_content:
differences.append(f"Content differs: {file_path}")
return differences
def compare_with_expected_materialized(
script_dir: Path, fix_mode: bool = False
) -> None:
"""Compare current test-proj with freshly generated template."""
with console.status(
"[bold green]Generating expected materialized version from current template..."
):
with tempfile.TemporaryDirectory() as temp_dir:
expected_proj = Path(temp_dir) / "expected-proj"
# Generate expected materialized version
run_copier_quietly(
str(script_dir),
str(expected_proj),
parse_template_variables(),
)
# Compare expected vs actual
test_proj_dir = script_dir / "test-proj"
differences = compare_directories(expected_proj, test_proj_dir)
if not differences:
console.print(
"✅ test-proj matches expected template output", style="bold green"
)
return
console.print(
f"\n❌ Found {len(differences)} differences between expected and actual:",
style="bold red",
)
for diff in differences:
console.print(f" {diff}")
files_to_copy = []
files_needing_manual_fix = []
# For files that differ in content, show detailed diff and categorize
console.print("\nDetailed differences:")
for diff in differences:
if diff.startswith("Content differs: "):
file_path = diff[len("Content differs: ") :]
expected_file = expected_proj / file_path
actual_file = test_proj_dir / file_path
# Determine corresponding template file path
template_file_path = map_materialized_to_template_path(
script_dir, str(file_path)
)
template_file = script_dir / template_file_path
# Read file contents for auto-resolution.
try:
with open(expected_file, "r", encoding="utf-8") as f:
expected_content = f.read()
with open(actual_file, "r", encoding="utf-8") as f:
actual_content = f.read()
except (UnicodeDecodeError, PermissionError):
expected_content = None
actual_content = None
console.print(f"\n--- Expected (from template): {file_path}")
console.print(f"+++ Actual (in test-proj): {file_path}")
# Use git diff for better output
try:
result = subprocess.run(
[
"git",
"diff",
"--no-index",
str(expected_file),
str(actual_file),
],
capture_output=True,
text=True,
cwd=script_dir,
)
# git diff returns 1 when files differ, which is expected
if result.stdout:
# Skip the file headers and show just the content diff
lines = result.stdout.split("\n")
for line in lines[4:]: # Skip first 4 lines (headers)
if line.strip():
console.print(f" {line}")
except subprocess.CalledProcessError:
# Fallback to basic diff indication
console.print(" (Files differ)")
# Categorize for fixing
if template_file_path.endswith(".jinja"):
# Try auto-resolution first
auto_resolved_content = None
if expected_content and actual_content:
auto_resolved_content = (
attempt_chunk_based_jinja_resolution(
template_file, expected_content, actual_content
)
)
if auto_resolved_content:
# Accept the auto-resolution (our logic is conservative enough)
if fix_mode:
console.print(
f" ✓ Auto-resolved: {template_file_path}"
)
else:
console.print(
f" ✓ Would auto-resolve: {template_file_path}"
)
files_to_copy.append(
(
str(file_path),
template_file_path,
None,
template_file,
auto_resolved_content,
)
)
else:
files_needing_manual_fix.append(
(file_path, template_file_path)
)
else:
files_to_copy.append(
(
str(file_path),
template_file_path,
actual_file,
template_file,
None,
)
)
elif diff.startswith("Extra file: "):
file_path = diff[len("Extra file: ") :]
actual_file = test_proj_dir / file_path
# Determine corresponding template file path
template_file_path = map_materialized_to_template_path(
script_dir, str(file_path)
)
template_file = script_dir / template_file_path
console.print(f"\nExtra file in test-proj: {file_path}")
# Categorize for fixing
if template_file_path.endswith(".jinja"):
# For extra files, we can't auto-resolve without expected content
files_needing_manual_fix.append((file_path, template_file_path))
else:
files_to_copy.append(
(
str(file_path),
template_file_path,
actual_file,
template_file,
None,
)
)
# Provide guidance and optionally fix (outside temp directory)
if fix_mode:
# Actually fix the files
if files_to_copy:
console.print(f"\nCopying {len(files_to_copy)} files back to template:")
for (
relative_path,
template_path,
actual_file,
template_file,
auto_resolved_content,
) in files_to_copy:
console.print(f"Copying {relative_path}{template_path}")
template_file.parent.mkdir(parents=True, exist_ok=True)
if auto_resolved_content:
# Write auto-resolved jinja content
with open(template_file, "w", encoding="utf-8") as f:
f.write(auto_resolved_content)
else:
# Copy regular file
shutil.copy2(actual_file, template_file)
if files_needing_manual_fix:
console.print(
f"\n⚠️ {len(files_needing_manual_fix)} templated files need manual resolution:"
)
for materialized_path, template_path in files_needing_manual_fix:
console.print(f" {materialized_path}{template_path}")
else:
# In check mode, just show what would happen
if files_to_copy or files_needing_manual_fix:
console.print("\nWould make the following changes:")
if files_to_copy:
console.print(f" Copy {len(files_to_copy)} files back to template")
if files_needing_manual_fix:
console.print(
f" {len(files_needing_manual_fix)} files need manual resolution"
)
console.print("\nTo apply changes, run: fix-template")
def map_materialized_to_template_path(script_dir: Path, materialized_path: str) -> str:
"""Map a materialized file path back to its template path."""
path_parts: tuple[str, ...] = Path(materialized_path).parts
# Handle the special case of src/{computed_name}/ → src/{{ project_name_snake }}/
variables = parse_template_variables()
project_name_snake = variables.get("project_name_snake", "test_proj")
if (
len(path_parts) >= 2
and path_parts[0] == "src"
and path_parts[1] == project_name_snake
):
# Replace computed name with the template variable
new_parts: tuple[str, ...] = ("src", "{{ project_name_snake }}") + path_parts[
2:
]
template_path: str = str(Path(*new_parts))
# Check if a .jinja version exists
jinja_path: str = template_path + ".jinja"
if (script_dir / jinja_path).exists():
return jinja_path
return template_path
# For other paths, check if .jinja version exists
jinja_path: str = materialized_path + ".jinja"
if (script_dir / jinja_path).exists():
return jinja_path
return materialized_path
@click.group()
def cli() -> None:
"""Template validation and fixing tools."""
pass
def regenerate_test_proj(script_dir: Path) -> None:
"""Regenerate the test-proj directory using copier."""
test_proj_dir: Path = script_dir / "test-proj"
# Parse template variables before deleting the directory
variables = parse_template_variables() if test_proj_dir.exists() else {}
# Delete the test-proj directory if it exists
if test_proj_dir.exists():
console.print(f"Deleting {test_proj_dir}")
shutil.rmtree(test_proj_dir)
else:
console.print(f"Directory {test_proj_dir} does not exist")
# Run copier to regenerate test-proj
with console.status("[bold green]Running copier to regenerate test-proj..."):
run_copier_quietly(
str(script_dir),
str(test_proj_dir),
variables,
)
# Revert the .copier-answers.yml file since it gets updated with new revision info
answers_file = test_proj_dir / ".copier-answers.yml"
if answers_file.exists():
try:
run_git_command(["git", "restore", str(answers_file)], cwd=script_dir)
except SystemExit:
# If git restore fails (e.g., file not tracked), just continue
pass
def get_script_dir_and_setup() -> Path:
"""Get the script directory and set up working directory. Common setup for all commands."""
script_dir: Path = Path(__file__).parent.parent
os.chdir(script_dir)
console.print(f"Working directory: {script_dir}")
return script_dir
def ensure_test_proj_exists(script_dir: Path) -> Path:
"""Ensure test-proj directory exists and return its path."""
test_proj_dir: Path = script_dir / "test-proj"
if not test_proj_dir.exists():
console.print(
"Error: test-proj directory does not exist. Run 'regenerate' first.",
style="bold red",
)
sys.exit(1)
return test_proj_dir
@cli.command()
def regenerate() -> None:
"""Regenerate test-proj directory using copier."""
script_dir: Path = get_script_dir_and_setup()
# Check for uncommitted changes before starting
console.print("Checking for uncommitted changes...")
git_status_check: subprocess.CompletedProcess[str] = run_git_command(
["git", "status", "--porcelain"], cwd=script_dir
)
if git_status_check.stdout.strip():
console.print(
"Error: Repository has uncommitted changes. Please commit or stash them first.",
style="bold red",
)
console.print(git_status_check.stdout)
sys.exit(1)
regenerate_test_proj(script_dir)
console.print("✓ test-proj regenerated")
@cli.command()
def check_regeneration() -> None:
"""Check if generated files match template (assumes test-proj already exists)."""
script_dir: Path = get_script_dir_and_setup()
regenerate_test_proj(script_dir)
# Check if generated files match template
console.print("Checking generated files against template...")
git_status: subprocess.CompletedProcess[str] = run_git_command(
["git", "status", "--porcelain"], cwd=script_dir
)
if git_status.stdout.strip():
console.print("\n❌ Generated files do not match template!", style="bold red")
console.print("\nFiles that differ:")
console.print(git_status.stdout)
console.print("\nDifferences:")
git_diff: subprocess.CompletedProcess[str] = run_git_command(
["git", "diff"], cwd=script_dir
)
console.print(git_diff.stdout)
console.print(
"\nTo fix: If these changes look good, likely you just need to run regenerate and commit the changes.",
style="bold red",
)
sys.exit(1)
else:
console.print("✓ Generated files match template")
def run_python_checks(test_proj_dir: Path, fix: bool) -> None:
"""Run Python validation checks on test-proj using hatch."""
# Run Python checks with hatch
console.print("Running Python validation checks...")
run_git_command(
["uv", "run", "hatch", "run", "all-fix" if fix else "all-check"],
cwd=test_proj_dir,
)
console.print("✓ Python checks passed")
@cli.command("check-python")
@click.option("--fix", is_flag=True, help="Fix formatting issues automatically.")
def check_python(fix: bool) -> None:
"""Run Python validation checks on test-proj using hatch."""
script_dir: Path = get_script_dir_and_setup()
test_proj_dir: Path = ensure_test_proj_exists(script_dir)
run_python_checks(test_proj_dir, fix)
def run_javascript_checks(test_proj_dir: Path, fix: bool) -> None:
"""Run TypeScript and format validation checks on test-proj/ui using npm."""
ui_dir: Path = test_proj_dir / "ui"
# Check if ui directory exists
if not ui_dir.exists():
console.print("Error: test-proj/ui directory does not exist.", style="bold red")
sys.exit(1)
# Run TypeScript checks with npm
console.print("Running TypeScript validation checks...")
run_git_command(["npm", "run", "all-fix" if fix else "all-check"], cwd=ui_dir)
console.print("✓ TypeScript checks passed")
@cli.command("check-javascript")
@click.option("--fix", is_flag=True, help="Fix formatting issues automatically.")
def check_javascript(fix: bool) -> None:
"""Run TypeScript and format validation checks on test-proj/ui using npm."""
script_dir: Path = get_script_dir_and_setup()
test_proj_dir: Path = ensure_test_proj_exists(script_dir)
run_javascript_checks(test_proj_dir, fix)
@cli.command()
@click.option(
"--fix",
is_flag=True,
help="Fix template files by copying back changes from materialized test-proj.",
)
@click.option(
"--fix-format",
is_flag=True,
help="Run Python and JavaScript formatters before fixing template files. Implies --fix.",
)
def check_template(fix: bool, fix_format: bool) -> None:
"""Fix template files by copying back changes from materialized test-proj.
Compares test-proj with what the current template would generate and fixes differences.
Use --check to preview changes without applying them.
"""
script_dir: Path = get_script_dir_and_setup()
# Validate options
if fix_format:
# implies fix
fix = True
# Check if test-proj exists
test_proj_dir: Path = ensure_test_proj_exists(script_dir)
# Run formatters if requested
if fix_format:
run_python_checks(test_proj_dir, fix=True)
run_javascript_checks(test_proj_dir, fix=True)
# Use expected materialized comparison approach
compare_with_expected_materialized(script_dir, fix_mode=fix)
if __name__ == "__main__":
cli()
-5
View File
@@ -1,5 +0,0 @@
# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
_commit: c778ba5
_src_path: .
project_name: test-proj
project_title: Test Proj
-2
View File
@@ -1,2 +0,0 @@
# copy to .env and place any needed secrets here. LLAMA_CLOUD_API_KEY will be automatically set
# OPENAI_API_KEY=sk-xxx
-3
View File
@@ -1,3 +0,0 @@
.env
__pycache__
workflows.db
-33
View File
@@ -1,33 +0,0 @@
# Data Extraction and Ingestion
This is a starter for a [`llama_deploy`](https://github.com/run-llama/cloud_llama_deploy) powered app.
See it's [getting started guide](https://github.com/run-llama/cloud_llama_deploy/tree/main/docs/guides/01-getting-started.md)
for more info on how to run this project.
The backend contains a single workflow that runs LlamaCloud Extraction, given your schema. The frontend exposes an
extraction review UI, where you can review and correct extractions.
## Customizing the schema.
The starter contains a placeholder `MySchema` that is used for extraction. See [`schema.py`](./src/test-proj/schema.py).
You should customize this `schema.py` for your use case to modify the extracted data. You can also rename the schema from `MySchema` to
something more appropriate for your use case. Do a find and replace on "MySchema" to also fix the frontend references.
The frontend has a copy of the schema as a json schema, that it uses to introspect and generate an editing UI. Run `uv run export-types` to regenerate the frontend json schema.
## Customizing the application
This is meant to just be a starting place. You can add more workflows, and trigger them from the UI. For example, you could
add functionality sync to a downstream data sink to export the corrected data after review. Or you could add a workflow
that monitors a data source, and automatically triggers the extraction against the file.
### Running Workflows
The core value of this template is good extraction. The main python code is in the `src` directory.
Workflows can be triggered from the UI using `useWorkflow` react hooks from the `@llamaindex/ui` library.
You can also add a `if __name__ == "__main__":` handler to individual workflows to run and debug them directly.
The `process_file.py` has main handler that will upload a `test.pdf` from your current working directory so you
can test your extraction directly. Note, while `llamactl serve` will inject your LlamaCloud API key, you will need
to set a `LLAMA_CLOUD_API_KEY` in your `.env` file or set an env var to run the `__main__` handler directly.
-53
View File
@@ -1,53 +0,0 @@
[project]
name = "test-proj"
version = "0.1.0"
description = "Extracts data"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"llama-cloud-services>=0.6.69",
"llama-index-workflows>=2.2.0,<3.0.0",
"python-dotenv>=1.1.0",
"jsonref>=1.1.0",
"click>=8.2.1,<8.3.0",
"httpx>=0.28.1",
"llama-index-core>=0.14.0",
]
[project.scripts]
export-types = "test_proj.export_types:export_types"
[dependency-groups]
dev = [
"ruff>=0.11.10",
"typescript>=0.0.12",
"ty>=0.0.1a16",
"pytest>=8.4.1",
"hatch>=1.14.1",
"llamactl>=0.3.0"
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.envs.default.scripts]
"format" = "ruff format ."
"format-check" = "ruff format --check ."
"lint" = "ruff check --fix ."
"lint-check" = ["ruff check ."]
typecheck = "ty check src"
test = "pytest"
"all-check" = ["format-check", "lint-check", "test"]
"all-fix" = ["format", "lint", "test"]
[tool.llamadeploy]
env_files = [".env"]
llama_cloud = true
[tool.llamadeploy.workflows]
process-file = "test_proj.process_file:workflow"
[tool.llamadeploy.ui]
directory = "ui"
View File
-75
View File
@@ -1,75 +0,0 @@
import functools
import os
import httpx
import dotenv
from llama_cloud_services import ExtractionAgent, LlamaExtract
from llama_cloud_services.extract import ExtractConfig, ExtractMode
from llama_cloud.core.api_error import ApiError
from llama_cloud_services.beta.agent_data import AsyncAgentDataClient, ExtractedData
from llama_cloud.client import AsyncLlamaCloud
from .schemas import MySchema
dotenv.load_dotenv()
# deployed agents may infer their name from the deployment name
# Note: Make sure that an agent deployment with this name actually exists
# otherwise calls to get or set data will fail. You may need to adjust the `or `
# name for development
agent_name = os.getenv("LLAMA_DEPLOY_DEPLOYMENT_NAME")
agent_name_or_default = agent_name or "test-proj"
# required for all llama cloud calls
api_key = os.environ["LLAMA_CLOUD_API_KEY"]
# get this in case running against a different environment than production
base_url = os.getenv("LLAMA_CLOUD_BASE_URL")
extracted_data_collection = "test-proj"
project_id = os.getenv("LLAMA_DEPLOY_PROJECT_ID")
@functools.lru_cache(maxsize=None)
def get_extract_agent() -> ExtractionAgent:
extract_api = LlamaExtract(
api_key=api_key, base_url=base_url, project_id=project_id
)
config = ExtractConfig(
extraction_mode=ExtractMode.PREMIUM,
system_prompt=None,
# advanced
use_reasoning=False,
cite_sources=False,
confidence_scores=True,
)
try:
existing = extract_api.get_agent(agent_name_or_default)
existing.data_schema = MySchema
existing.config = config
return existing
except ApiError as e:
if e.status_code == 404:
return extract_api.create_agent(
name=agent_name_or_default, data_schema=MySchema, config=config
)
else:
raise
@functools.lru_cache(maxsize=None)
def get_data_client() -> AsyncAgentDataClient:
return AsyncAgentDataClient(
deployment_name=agent_name,
collection=extracted_data_collection,
# update MySchema for your schema, but retain the ExtractedData container
type=ExtractedData[MySchema],
client=get_llama_cloud_client(),
)
@functools.lru_cache(maxsize=None)
def get_llama_cloud_client():
return AsyncLlamaCloud(
base_url=base_url,
token=api_key,
httpx_client=httpx.AsyncClient(
timeout=60, headers={"Project-Id": project_id} if project_id else None
),
)
-80
View File
@@ -1,80 +0,0 @@
"""
Script to export pydantic types from a python file (default "schemas.py") to json schemas and then to typescript interfaces.
For sharing types precisely between python and typescript
"""
import importlib.util
import inspect
import json
import os
import shutil
import subprocess
import sys
from pathlib import Path
from types import ModuleType
import jsonref
from pydantic import BaseModel
import click
def run_command(cmd: str):
result = subprocess.run(cmd, shell=True)
if result.returncode != 0:
print(f"Command failed: {cmd}", file=sys.stderr)
sys.exit(result.returncode)
@click.command()
@click.option(
"--schema-file",
default="schemas.py",
help="The name of the model file to export types from",
)
def export_types(schema_file: str):
app_path = Path(__file__).parent.parent.parent
print("Exporting types...")
schema_path = Path(__file__).parent / schema_file
if not schema_path.exists():
raise click.BadParameter(f"Schema file '{schema_file}' not found in app")
print(f"Exporting types from {schema_path}...")
output_dir = app_path / "ui" / "src" / "schemas"
if output_dir.exists():
shutil.rmtree(output_dir)
os.makedirs(output_dir)
export_schemas(schema_path, output_dir)
generate_typescript_interfaces(output_dir)
def generate_typescript_interfaces(schema_dir: Path):
run_command(
f"npx -y json-schema-to-typescript@15.0.4 -i '{schema_dir / '*.json'}' -o {schema_dir} --additionalProperties=false"
)
def load_module_from_path(module_name: str, file_path: Path) -> ModuleType:
spec = importlib.util.spec_from_file_location(module_name, file_path)
if spec is None or spec.loader is None:
raise ValueError(f"Failed to load module from {file_path}")
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
def export_schemas(py_file: Path, output_dir: Path):
module_name = os.path.splitext(os.path.basename(py_file))[0]
module = load_module_from_path(module_name, py_file)
os.makedirs(output_dir, exist_ok=True)
for name, obj in inspect.getmembers(module):
if inspect.isclass(obj) and issubclass(obj, BaseModel) and obj is not BaseModel:
schema = obj.model_json_schema()
normalized_schema = jsonref.replace_refs(schema, proxies=False)
with open(os.path.join(output_dir, f"{name}.json"), "w") as f:
f.write(json.dumps(normalized_schema, indent=2))
print(f"Exported {name} to {name}.json")
if __name__ == "__main__":
export_types()
-208
View File
@@ -1,208 +0,0 @@
import asyncio
import hashlib
import logging
import os
from pathlib import Path
import tempfile
from typing import Any, Literal
import httpx
from llama_cloud import ExtractRun
from llama_cloud_services.extract import SourceText
from llama_cloud_services.beta.agent_data import ExtractedData, InvalidExtractionData
from workflows import Context, Workflow, step
from workflows.events import Event, StartEvent, StopEvent
from workflows.retry_policy import ConstantDelayRetryPolicy
from .config import get_llama_cloud_client, get_data_client, get_extract_agent
from .schemas import MySchema
logger = logging.getLogger(__name__)
class FileEvent(StartEvent):
file_id: str
class DownloadFileEvent(Event):
file_id: str
class FileDownloadedEvent(Event):
file_id: str
file_path: str
filename: str
class UIToast(Event):
level: Literal["info", "warning", "error"]
message: str
class ExtractedEvent(Event):
data: ExtractedData[MySchema]
class ExtractedInvalidEvent(Event):
data: ExtractedData[dict[str, Any]]
class ProcessFileWorkflow(Workflow):
"""
Given a file path, this workflow will process a single file through the custom extraction logic.
"""
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
async def run_file(self, event: FileEvent) -> DownloadFileEvent:
logger.info(f"Running file {event.file_id}")
return DownloadFileEvent(file_id=event.file_id)
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
async def download_file(
self, event: DownloadFileEvent, ctx: Context
) -> FileDownloadedEvent:
"""Download the file reference from the cloud storage"""
try:
file_metadata = await get_llama_cloud_client().files.get_file(
id=event.file_id
)
file_url = await get_llama_cloud_client().files.read_file_content(
event.file_id
)
temp_dir = tempfile.gettempdir()
filename = file_metadata.name
file_path = os.path.join(temp_dir, filename)
client = httpx.AsyncClient()
# Report progress to the UI
logger.info(f"Downloading file {file_url.url} to {file_path}")
async with client.stream("GET", file_url.url) as response:
with open(file_path, "wb") as f:
async for chunk in response.aiter_bytes():
f.write(chunk)
logger.info(f"Downloaded file {file_url.url} to {file_path}")
return FileDownloadedEvent(
file_id=event.file_id, file_path=file_path, filename=filename
)
except Exception as e:
logger.error(f"Error downloading file {event.file_id}: {e}", exc_info=True)
ctx.write_event_to_stream(
UIToast(
level="error",
message=f"Error downloading file {event.file_id}: {e}",
)
)
raise e
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
async def process_file(
self, event: FileDownloadedEvent, ctx: Context
) -> ExtractedEvent | ExtractedInvalidEvent:
"""Runs the extraction against the file"""
try:
agent = get_extract_agent()
# track the content of the file, so as to be able to de-duplicate
file_content = Path(event.file_path).read_bytes()
file_hash = hashlib.sha256(file_content).hexdigest()
source_text = SourceText(
file=event.file_path,
filename=event.filename,
)
logger.info(f"Extracting data from file {event.filename}")
ctx.write_event_to_stream(
UIToast(
level="info", message=f"Extracting data from file {event.filename}"
)
)
extracted_result: ExtractRun = await agent.aextract(source_text)
try:
logger.info(f"Extracted data: {extracted_result}")
data = ExtractedData.from_extraction_result(
result=extracted_result,
schema=MySchema,
file_hash=file_hash,
)
return ExtractedEvent(data=data)
except InvalidExtractionData as e:
logger.error(f"Error validating extracted data: {e}", exc_info=True)
return ExtractedInvalidEvent(data=e.invalid_item)
except Exception as e:
logger.error(
f"Error extracting data from file {event.filename}: {e}",
exc_info=True,
)
ctx.write_event_to_stream(
UIToast(
level="error",
message=f"Error extracting data from file {event.filename}: {e}",
)
)
raise e
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
async def record_extracted_data(
self, event: ExtractedEvent | ExtractedInvalidEvent, ctx: Context
) -> StopEvent:
"""Records the extracted data to the agent data API"""
try:
logger.info(f"Recorded extracted data for file {event.data.file_name}")
ctx.write_event_to_stream(
UIToast(
level="info",
message=f"Recorded extracted data for file {event.data.file_name}",
)
)
# remove past data when reprocessing the same file
if event.data.file_hash:
existing_data = await get_data_client().search(
filter={
"file_hash": {
"eq": event.data.file_hash,
},
},
)
if existing_data.items:
logger.info(
f"Removing past data for file {event.data.file_name} with hash {event.data.file_hash}"
)
await asyncio.gather(
*[
get_data_client().delete_item(item.id)
for item in existing_data.items
]
)
# finally, save the new data
item_id = await get_data_client().create_item(event.data)
return StopEvent(
result=item_id.id,
)
except Exception as e:
logger.error(
f"Error recording extracted data for file {event.data.file_name}: {e}",
exc_info=True,
)
ctx.write_event_to_stream(
UIToast(
level="error",
message=f"Error recording extracted data for file {event.data.file_name}: {e}",
)
)
raise e
workflow = ProcessFileWorkflow(timeout=None)
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv()
logging.basicConfig(level=logging.INFO)
async def main():
file = await get_llama_cloud_client().files.upload_file(
upload_file=Path("test.pdf").open("rb")
)
await workflow.run(start_event=FileEvent(file_id=file.id))
asyncio.run(main())
-7
View File
@@ -1,7 +0,0 @@
from pydantic import BaseModel
# Rename and extend this, and then regenerate the js types with `uv run export-types`
# Additional schemas can be added here to share them between python and typescript
class MySchema(BaseModel):
hello: str
-2
View File
@@ -1,2 +0,0 @@
def test_placeholder():
pass
-43
View File
@@ -1,43 +0,0 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions
# testing
/coverage
# next.js
/.next/
/out/
/dist/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# env files (can opt-in for committing if needed)
.env*
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
-7
View File
@@ -1,7 +0,0 @@
# Data Extraction UI
This is a simple next.js template that builds on the @llamaindex/agent-app ui component library
for showing displaying tables of extracted data.
Ideally run this with `llamactl` in the parent directory (See [README.md](../README.md)),
but you can also run it standalone with `npm run dev`, but workflow integrations will not work
-21
View File
@@ -1,21 +0,0 @@
{
"$schema": "https://ui.shadcn.com/schema.json",
"style": "new-york",
"rsc": true,
"tsx": true,
"tailwind": {
"config": "",
"css": "src/index.css",
"baseColor": "zinc",
"cssVariables": true,
"prefix": ""
},
"aliases": {
"components": "@/components",
"utils": "@/lib/utils",
"ui": "@/components/ui",
"lib": "@/lib",
"hooks": "@/hooks"
},
"iconLibrary": "lucide"
}
-12
View File
@@ -1,12 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Test Proj</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>
-45
View File
@@ -1,45 +0,0 @@
{
"name": "test-proj-ui",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"preview": "vite preview",
"lint": "tsc --noEmit",
"format": "prettier --write src",
"format-check": "prettier --check src",
"all-check": "pnpm i && pnpm run lint && pnpm run format-check && pnpm run build",
"all-fix": "pnpm i && pnpm run lint && pnpm run format && pnpm run build"
},
"dependencies": {
"@babel/runtime": "^7.27.6",
"@lezer/highlight": "^1.2.1",
"@llamaindex/ui": "^2.1.2",
"@radix-ui/themes": "^3.2.1",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"llama-cloud-services": "^0.3.4",
"lucide-react": "^0.514.0",
"react": "^18.3.0",
"react-dom": "^18.3.0",
"react-router-dom": "^6.30.0",
"sonner": "^2.0.5",
"tw-animate-css": "^1.3.5"
},
"devDependencies": {
"@tailwindcss/postcss": "^4.1.10",
"@types/node": "^20",
"@types/react": "^19",
"@types/react-dom": "^19",
"@vitejs/plugin-react": "^4.3.4",
"postcss": "^8.5.5",
"prettier": "^3.6.2",
"tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.8",
"typescript": "^5",
"vite": "^6.0.5"
},
"packageManager": "pnpm@10.11.1+sha512.e519b9f7639869dc8d5c3c5dfef73b3f091094b0a006d7317353c72b124e80e1afd429732e28705ad6bfa1ee879c1fce46c128ccebd3192101f43dd67c667912"
}
-7
View File
@@ -1,7 +0,0 @@
const config = {
plugins: {
"@tailwindcss/postcss": {},
},
};
export default config;
-71
View File
@@ -1,71 +0,0 @@
import React from "react";
import { Routes, Route } from "react-router-dom";
import { Theme } from "@radix-ui/themes";
import {
Breadcrumb,
BreadcrumbItem,
BreadcrumbList,
BreadcrumbSeparator,
ApiProvider,
} from "@llamaindex/ui";
import { Link } from "react-router-dom";
import { Toaster } from "@llamaindex/ui";
import { useToolbar, ToolbarProvider } from "@/lib/ToolbarContext";
import { clients } from "@/lib/client";
// Import pages
import HomePage from "./pages/HomePage";
import ItemPage from "./pages/ItemPage";
export default function App() {
return (
<Theme>
<ApiProvider clients={clients}>
<ToolbarProvider>
<div className="grid grid-rows-[auto_1fr] h-screen">
<Toolbar />
<main className="overflow-auto">
<Routes>
<Route path="/" element={<HomePage />} />
<Route path="/item/:itemId" element={<ItemPage />} />
</Routes>
</main>
</div>
<Toaster />
</ToolbarProvider>
</ApiProvider>
</Theme>
);
}
const Toolbar = () => {
const { buttons, breadcrumbs } = useToolbar();
return (
<header className="sticky top-0 z-50 flex h-16 shrink-0 items-center gap-2 border-b px-4 bg-white/95 backdrop-blur supports-[backdrop-filter]:bg-white/60">
<Breadcrumb>
<BreadcrumbList>
{breadcrumbs.map((item, index) => (
<React.Fragment key={index}>
{index > 0 && <BreadcrumbSeparator />}
<BreadcrumbItem>
{item.href && !item.isCurrentPage ? (
<Link to={item.href} className="font-medium text-base">
{item.label}
</Link>
) : (
<span
className={`font-medium ${index === 0 ? "text-base" : ""}`}
>
{item.label}
</span>
)}
</BreadcrumbItem>
</React.Fragment>
))}
</BreadcrumbList>
</Breadcrumb>
{buttons}
</header>
);
};
-120
View File
@@ -1,120 +0,0 @@
@import "tailwindcss";
@import "tw-animate-css";
@custom-variant dark (&:is(.dark *));
@theme inline {
--radius-sm: calc(var(--radius) - 4px);
--radius-md: calc(var(--radius) - 2px);
--radius-lg: var(--radius);
--radius-xl: calc(var(--radius) + 4px);
--color-background: var(--background);
--color-foreground: var(--foreground);
--color-card: var(--card);
--color-card-foreground: var(--card-foreground);
--color-popover: var(--popover);
--color-popover-foreground: var(--popover-foreground);
--color-primary: var(--primary);
--color-primary-foreground: var(--primary-foreground);
--color-secondary: var(--secondary);
--color-secondary-foreground: var(--secondary-foreground);
--color-muted: var(--muted);
--color-muted-foreground: var(--muted-foreground);
--color-accent: var(--accent);
--color-accent-foreground: var(--accent-foreground);
--color-destructive: var(--destructive);
--color-border: var(--border);
--color-input: var(--input);
--color-ring: var(--ring);
--color-chart-1: var(--chart-1);
--color-chart-2: var(--chart-2);
--color-chart-3: var(--chart-3);
--color-chart-4: var(--chart-4);
--color-chart-5: var(--chart-5);
--color-sidebar: var(--sidebar);
--color-sidebar-foreground: var(--sidebar-foreground);
--color-sidebar-primary: var(--sidebar-primary);
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
--color-sidebar-accent: var(--sidebar-accent);
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
--color-sidebar-border: var(--sidebar-border);
--color-sidebar-ring: var(--sidebar-ring);
}
:root {
--radius: 0.625rem;
--card: oklch(1 0 0);
--card-foreground: oklch(0.141 0.005 285.823);
--popover: oklch(1 0 0);
--popover-foreground: oklch(0.141 0.005 285.823);
--primary: oklch(0.21 0.006 285.885);
--primary-foreground: oklch(0.985 0 0);
--secondary: oklch(0.967 0.001 286.375);
--secondary-foreground: oklch(0.21 0.006 285.885);
--muted: oklch(0.967 0.001 286.375);
--muted-foreground: oklch(0.552 0.016 285.938);
--accent: oklch(0.967 0.001 286.375);
--accent-foreground: oklch(0.21 0.006 285.885);
--destructive: oklch(0.577 0.245 27.325);
--border: oklch(0.92 0.004 286.32);
--input: oklch(0.92 0.004 286.32);
--ring: oklch(0.705 0.015 286.067);
--chart-1: oklch(0.646 0.222 41.116);
--chart-2: oklch(0.6 0.118 184.704);
--chart-3: oklch(0.398 0.07 227.392);
--chart-4: oklch(0.828 0.189 84.429);
--chart-5: oklch(0.769 0.188 70.08);
--sidebar: oklch(0.985 0 0);
--sidebar-foreground: oklch(0.141 0.005 285.823);
--sidebar-primary: oklch(0.21 0.006 285.885);
--sidebar-primary-foreground: oklch(0.985 0 0);
--sidebar-accent: oklch(0.967 0.001 286.375);
--sidebar-accent-foreground: oklch(0.21 0.006 285.885);
--sidebar-border: oklch(0.92 0.004 286.32);
--sidebar-ring: oklch(0.705 0.015 286.067);
--background: oklch(1 0 0);
--foreground: oklch(0.141 0.005 285.823);
}
.dark {
--background: oklch(0.141 0.005 285.823);
--foreground: oklch(0.985 0 0);
--card: oklch(0.21 0.006 285.885);
--card-foreground: oklch(0.985 0 0);
--popover: oklch(0.21 0.006 285.885);
--popover-foreground: oklch(0.985 0 0);
--primary: oklch(0.92 0.004 286.32);
--primary-foreground: oklch(0.21 0.006 285.885);
--secondary: oklch(0.274 0.006 286.033);
--secondary-foreground: oklch(0.985 0 0);
--muted: oklch(0.274 0.006 286.033);
--muted-foreground: oklch(0.705 0.015 286.067);
--accent: oklch(0.274 0.006 286.033);
--accent-foreground: oklch(0.985 0 0);
--destructive: oklch(0.704 0.191 22.216);
--border: oklch(1 0 0 / 10%);
--input: oklch(1 0 0 / 15%);
--ring: oklch(0.552 0.016 285.938);
--chart-1: oklch(0.488 0.243 264.376);
--chart-2: oklch(0.696 0.17 162.48);
--chart-3: oklch(0.769 0.188 70.08);
--chart-4: oklch(0.627 0.265 303.9);
--chart-5: oklch(0.645 0.246 16.439);
--sidebar: oklch(0.21 0.006 285.885);
--sidebar-foreground: oklch(0.985 0 0);
--sidebar-primary: oklch(0.488 0.243 264.376);
--sidebar-primary-foreground: oklch(0.985 0 0);
--sidebar-accent: oklch(0.274 0.006 286.033);
--sidebar-accent-foreground: oklch(0.985 0 0);
--sidebar-border: oklch(1 0 0 / 10%);
--sidebar-ring: oklch(0.552 0.016 285.938);
}
@layer base {
* {
@apply border-border outline-ring/50;
}
body {
@apply bg-background text-foreground;
}
}
-41
View File
@@ -1,41 +0,0 @@
import React from "react";
import { APP_TITLE } from "./config";
export interface BreadcrumbItem {
label: string;
href?: string;
isCurrentPage?: boolean;
}
export const ToolbarCtx = React.createContext<{
buttons: React.ReactNode[];
setButtons: (fn: (prev: React.ReactNode[]) => React.ReactNode[]) => void;
breadcrumbs: BreadcrumbItem[];
setBreadcrumbs: (items: BreadcrumbItem[]) => void;
}>({
buttons: [],
setButtons: () => {},
breadcrumbs: [],
setBreadcrumbs: () => {},
});
export const ToolbarProvider = ({
children,
}: {
children: React.ReactNode;
}) => {
const [buttons, setButtons] = React.useState<React.ReactNode[]>([]);
const [breadcrumbs, setBreadcrumbs] = React.useState<BreadcrumbItem[]>([
{ label: APP_TITLE, href: "/" },
]);
return (
<ToolbarCtx.Provider
value={{ buttons, setButtons, breadcrumbs, setBreadcrumbs }}
>
{children}
</ToolbarCtx.Provider>
);
};
export const useToolbar = () => React.useContext(ToolbarCtx);
-45
View File
@@ -1,45 +0,0 @@
import { MySchema } from "@/schemas/MySchema";
import { ExtractedData } from "llama-cloud-services/beta/agent";
import {
ApiClients,
createWorkflowsClient,
createWorkflowsConfig,
} from "@llamaindex/ui";
import { createCloudAgentClient, cloudApiClient } from "@llamaindex/ui";
import { AGENT_NAME, EXTRACTED_DATA_COLLECTION } from "./config";
const platformToken = import.meta.env.VITE_LLAMA_CLOUD_API_KEY;
const apiBaseUrl = import.meta.env.VITE_LLAMA_CLOUD_BASE_URL;
const projectId = import.meta.env.VITE_LLAMA_DEPLOY_PROJECT_ID;
// Configure the platform client
cloudApiClient.setConfig({
...(apiBaseUrl && { baseUrl: apiBaseUrl }),
headers: {
// optionally use a backend API token scoped to a project. For local development,
...(platformToken && { authorization: `Bearer ${platformToken}` }),
// This header is required for requests to correctly scope to the agent's project
// when authenticating with a user cookie
...(projectId && { "Project-Id": projectId }),
},
});
const agentClient = createCloudAgentClient<ExtractedData<MySchema>>({
client: cloudApiClient,
windowUrl: typeof window !== "undefined" ? window.location.href : undefined,
collection: EXTRACTED_DATA_COLLECTION,
});
const workflowsClient = createWorkflowsClient(
createWorkflowsConfig({
baseUrl: `/deployments/${AGENT_NAME}/`,
}),
);
const clients: ApiClients = {
workflowsClient: workflowsClient,
cloudApiClient: cloudApiClient,
agentDataClient: agentClient,
};
export { clients, agentClient };
-3
View File
@@ -1,3 +0,0 @@
export const APP_TITLE = "Test Proj";
export const EXTRACTED_DATA_COLLECTION = "test-proj";
export const AGENT_NAME = import.meta.env.VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME;
-6
View File
@@ -1,6 +0,0 @@
import { clsx, type ClassValue } from "clsx";
import { twMerge } from "tailwind-merge";
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs));
}
-14
View File
@@ -1,14 +0,0 @@
import { StrictMode } from "react";
import { createRoot } from "react-dom/client";
import { HashRouter } from "react-router-dom";
import App from "./App";
import "@llamaindex/ui/styles.css";
import "./index.css";
createRoot(document.getElementById("root")!).render(
<StrictMode>
<HashRouter>
<App />
</HashRouter>
</StrictMode>,
);
@@ -1,23 +0,0 @@
.main {
padding: 1rem;
}
.grid {
display: flex;
flex-direction: row;
gap: 1rem;
margin-bottom: 1rem;
& > * {
flex: 1;
}
}
.commandBar {
display: flex;
justify-content: flex-end;
margin-bottom: 1rem;
}
.progressBar {
margin-bottom: 1rem;
}
-88
View File
@@ -1,88 +0,0 @@
import {
ItemCount,
WorkflowTrigger,
WorkflowProgressBar,
ExtractedDataItemGrid,
useWorkflowHandlerList,
} from "@llamaindex/ui";
import type { TypedAgentData } from "llama-cloud-services/beta/agent";
import styles from "./HomePage.module.css";
import { useNavigate } from "react-router-dom";
import { useEffect, useState } from "react";
export default function HomePage() {
const { taskKey } = taskCompletedState();
return <TaskList key={taskKey} />;
}
/**
* Returns a key that increments when a task is completed, can be used to force a re-render of the task list
*/
function taskCompletedState() {
const { handlers } = useWorkflowHandlerList("process-file");
const runningTasks = handlers.filter(
(handler) => handler.status === "running",
);
const [runningTaskCount, setRunningTaskCount] = useState(runningTasks.length);
const [taskKey, setTaskKey] = useState(0);
useEffect(() => {
if (runningTasks.length < runningTaskCount) {
// forcefully reload task list after a task is completed
setTaskKey(taskKey + 1);
}
setRunningTaskCount(runningTasks.length);
}, [runningTasks.length]);
return { runningTaskCount, taskKey };
}
function TaskList() {
const navigate = useNavigate();
const goToItem = (item: TypedAgentData) => {
navigate(`/item/${item.id}`);
};
return (
<div className={styles.page}>
<main className={styles.main}>
<div className={styles.grid}>
<ItemCount title="Total Items" />
<ItemCount
title="Reviewed"
filter={{
status: { eq: "approved" },
}}
/>
<ItemCount
title="Needs Review"
filter={{
status: { eq: "pending_review" },
}}
/>
</div>
<div className={styles.commandBar}>
<WorkflowTrigger
workflowName="process-file"
customWorkflowInput={(files) => {
return {
file_id: files[0].fileId,
};
}}
/>
</div>
<WorkflowProgressBar
className={styles.progressBar}
workflowName="process-file"
/>
<ExtractedDataItemGrid
onRowClick={goToItem}
builtInColumns={{
fileName: true,
status: true,
createdAt: true,
itemsToReview: true,
actions: true,
}}
/>
</main>
</div>
);
}
-138
View File
@@ -1,138 +0,0 @@
import { useEffect, useState } from "react";
import {
AcceptReject,
ExtractedDataDisplay,
FilePreview,
useItemData,
type Highlight,
} from "@llamaindex/ui";
import { Clock, XCircle } from "lucide-react";
import { useParams } from "react-router-dom";
import type { MySchema } from "../schemas/MySchema";
import MyJsonSchema from "../schemas/MySchema.json" with { type: "json" };
import { useToolbar } from "@/lib/ToolbarContext";
import { useNavigate } from "react-router-dom";
import { modifyJsonSchema } from "@llamaindex/ui/lib";
import { APP_TITLE } from "@/lib/config";
export default function ItemPage() {
const { itemId } = useParams<{ itemId: string }>();
const { setButtons, setBreadcrumbs } = useToolbar();
const [highlight, setHighlight] = useState<Highlight | undefined>(undefined);
// Use the hook to fetch item data
const itemHookData = useItemData<MySchema>({
// order/remove fields as needed here
jsonSchema: modifyJsonSchema(MyJsonSchema as any, {}),
itemId: itemId as string,
isMock: false,
});
const navigate = useNavigate();
// Update breadcrumb when item data loads
useEffect(() => {
const fileName = itemHookData.item?.data?.file_name;
if (fileName) {
setBreadcrumbs([
{ label: APP_TITLE, href: "/" },
{
label: fileName,
isCurrentPage: true,
},
]);
}
return () => {
// Reset to default breadcrumb when leaving the page
setBreadcrumbs([{ label: APP_TITLE, href: "/" }]);
};
}, [itemHookData.item?.data?.file_name, setBreadcrumbs]);
useEffect(() => {
setButtons(() => [
<div className="ml-auto flex items-center">
<AcceptReject<MySchema>
itemData={itemHookData}
onComplete={() => navigate("/")}
/>
</div>,
]);
return () => {
setButtons(() => []);
};
}, [itemHookData.data, setButtons]);
const {
item: itemData,
updateData,
loading: isLoading,
error,
} = itemHookData;
if (isLoading) {
return (
<div className="flex h-screen items-center justify-center">
<div className="text-center">
<Clock className="h-8 w-8 animate-spin mx-auto mb-2" />
<div className="text-sm text-gray-500">Loading item...</div>
</div>
</div>
);
}
if (error || !itemData) {
return (
<div className="flex h-screen items-center justify-center">
<div className="text-center">
<XCircle className="h-8 w-8 text-red-500 mx-auto mb-2" />
<div className="text-sm text-gray-500">
Error loading item: {error || "Item not found"}
</div>
</div>
</div>
);
}
return (
<div className="flex h-full bg-gray-50">
{/* Left Side - File Preview */}
<div className="w-1/2 border-r border-gray-200 bg-white">
{itemData.data.file_id && (
<FilePreview
fileId={itemData.data.file_id}
onBoundingBoxClick={(box, pageNumber) => {
console.log("Bounding box clicked:", box, "on page:", pageNumber);
}}
highlight={highlight}
/>
)}
</div>
{/* Right Side - Review Panel */}
<div className="flex-1 bg-white h-full overflow-y-auto">
<div className="p-4 space-y-4">
{/* Extracted Data */}
<ExtractedDataDisplay<MySchema>
extractedData={itemData.data}
title="Extracted Data"
onChange={(updatedData) => {
updateData(updatedData);
}}
onClickField={(args) => {
// TODO: set multiple highlights
setHighlight({
page: args.metadata?.citation?.[0]?.page ?? 1,
x: 100,
y: 100,
width: 0,
height: 0,
});
}}
jsonSchema={itemHookData.jsonSchema}
/>
</div>
</div>
</div>
);
}
-12
View File
@@ -1,12 +0,0 @@
/* eslint-disable */
/**
* This file was automatically generated by json-schema-to-typescript.
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
* and run json-schema-to-typescript to regenerate this file.
*/
export type Hello = string;
export interface MySchema {
hello: Hello;
}
-11
View File
@@ -1,11 +0,0 @@
{
"properties": {
"hello": {
"title": "Hello",
"type": "string"
}
},
"required": ["hello"],
"title": "MySchema",
"type": "object"
}
-15
View File
@@ -1,15 +0,0 @@
/// <reference types="vite/client" />
interface ImportMetaEnv {
readonly VITE_LLAMA_CLOUD_API_KEY?: string;
readonly VITE_LLAMA_CLOUD_BASE_URL?: string;
// injected from llama_deploy
readonly VITE_LLAMA_DEPLOY_BASE_PATH: string;
readonly VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME: string;
readonly VITE_LLAMA_DEPLOY_PROJECT_ID: string;
}
interface ImportMeta {
readonly env: ImportMetaEnv;
}
-31
View File
@@ -1,31 +0,0 @@
{
"compilerOptions": {
"target": "ES2020",
"useDefineForClassFields": true,
"lib": ["ES2020", "DOM", "DOM.Iterable"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
/* Path mapping */
"baseUrl": ".",
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["src", "vite.config.ts", "src/vite-env.d.ts"],
"exclude": ["node_modules"]
}
-43
View File
@@ -1,43 +0,0 @@
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react";
import path from "path";
// https://vitejs.dev/config/
export default defineConfig(({}) => {
const deploymentName = process.env.LLAMA_DEPLOY_DEPLOYMENT_NAME;
const basePath = process.env.LLAMA_DEPLOY_DEPLOYMENT_BASE_PATH;
const projectId = process.env.LLAMA_DEPLOY_PROJECT_ID;
const port = process.env.PORT ? Number(process.env.PORT) : 3000;
const baseUrl = process.env.LLAMA_CLOUD_BASE_URL;
return {
plugins: [react()],
resolve: {
alias: {
"@": path.resolve(__dirname, "./src"),
},
},
server: {
port: port,
host: true,
},
build: {
outDir: "dist",
sourcemap: true,
},
base: basePath,
define: {
// Primary define uses NAME
"import.meta.env.VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME": JSON.stringify(
deploymentName
),
"import.meta.env.VITE_LLAMA_DEPLOY_DEPLOYMENT_BASE_PATH": JSON.stringify(basePath),
...(projectId && {
"import.meta.env.VITE_LLAMA_DEPLOY_PROJECT_ID":
JSON.stringify(projectId),
}),
...(baseUrl && {
"import.meta.env.VITE_LLAMA_CLOUD_BASE_URL": JSON.stringify(baseUrl),
}),
},
};
});