mirror of
https://github.com/run-llama/template-workflow-data-extraction.git
synced 2026-06-30 21:38:03 -04:00
new template
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
# questions
|
||||
project_name:
|
||||
type: str
|
||||
help: What is your project name?
|
||||
default: "{{_folder_name}}"
|
||||
validator: |-
|
||||
{% if not project_name.replace("-", "").isalpha() %}
|
||||
Project name must contain only letters and dashes
|
||||
{% endif %}
|
||||
@@ -0,0 +1,22 @@
|
||||
name: "{{project_name}}"
|
||||
|
||||
control-plane:
|
||||
port: 8000
|
||||
|
||||
default-service: echo_workflow
|
||||
|
||||
services:
|
||||
loop-files:
|
||||
name: Main Workflow
|
||||
source:
|
||||
type: local
|
||||
name: workflow
|
||||
path: workflow/loop_files:workflow
|
||||
python-dependencies:
|
||||
- "llama-cloud-services"
|
||||
|
||||
ui:
|
||||
name: "{{project_name}} UI"
|
||||
source:
|
||||
type: local
|
||||
name: ui
|
||||
@@ -0,0 +1,41 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
@@ -0,0 +1 @@
|
||||
# Next.JS Agent App Starter
|
||||
@@ -0,0 +1,9 @@
|
||||
import type { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
/* config options here */
|
||||
transpilePackages: ["@llamaindex/agent-app"],
|
||||
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "{{project_name}}-ui",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev --turbopack",
|
||||
"build": "next build --experimental-build-mode compile",
|
||||
"start": "next start",
|
||||
"lint": "tsc --noEmit"
|
||||
},
|
||||
"dependencies": {
|
||||
"@llamaindex/agent-app": "workspace:*",
|
||||
"@llamaindex/extracted-data-client": "workspace:*",
|
||||
"@radix-ui/themes": "^3.2.1",
|
||||
"next": "15.3.2",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"zod": "^3.25.30"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"prettier": "^3.5.3",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
import {
|
||||
ExtractedDataGrid,
|
||||
zodToJsonSchema,
|
||||
} from "@llamaindex/agent-app/server";
|
||||
// TODO - import your schema from @/schemas/SchemaName
|
||||
import { Placeholder } from "@/schemas/Placeholder";
|
||||
|
||||
interface PageProps {
|
||||
params: Promise<{ fileId: string }>;
|
||||
}
|
||||
|
||||
export default async function FilePage({ params }: PageProps) {
|
||||
const { fileId } = await params;
|
||||
return (
|
||||
<div>
|
||||
<ExtractedDataGrid
|
||||
fileId={fileId}
|
||||
schema={zodToJsonSchema(Placeholder)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
|
||||
html,
|
||||
body {
|
||||
max-width: 100vw;
|
||||
overflow-x: hidden;
|
||||
}
|
||||
|
||||
body {
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: none;
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
import type { Metadata } from "next";
|
||||
import { Geist, Geist_Mono } from "next/font/google";
|
||||
import "./globals.css";
|
||||
import { Theme } from "@radix-ui/themes";
|
||||
|
||||
const geistSans = Geist({
|
||||
variable: "--font-geist-sans",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
const geistMono = Geist_Mono({
|
||||
variable: "--font-geist-mono",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "{{project_name}}",
|
||||
description: "{{project_name}} Data Review",
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: Readonly<{
|
||||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body className={`${geistSans.variable} ${geistMono.variable}`}>
|
||||
<Theme>
|
||||
{children}
|
||||
</Theme>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
.page {
|
||||
|
||||
}
|
||||
|
||||
.main {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.grid {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
gap: 1rem;
|
||||
margin-bottom: 1rem;
|
||||
& > * {
|
||||
flex: 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
import { FileCount, FileGrid } from "@llamaindex/agent-app/server";
|
||||
import styles from "./page.module.css";
|
||||
|
||||
export default function Home() {
|
||||
const lastMonth = new Date(
|
||||
new Date().setMonth(new Date().getMonth() - 1)
|
||||
).toISOString();
|
||||
return (
|
||||
<div className={styles.page}>
|
||||
<main className={styles.main}>
|
||||
<div className={styles.grid}>
|
||||
<FileCount
|
||||
title="Total Documents"
|
||||
filter_fields={{ created_at: { gt: lastMonth } }}
|
||||
filter_status_counts={{}}
|
||||
/>
|
||||
<FileCount
|
||||
title="Reviewed"
|
||||
filter_fields={{ created_at: { gt: lastMonth } }}
|
||||
filter_status_counts={{ pending_review: { eq: 0 } }}
|
||||
/>
|
||||
<FileCount
|
||||
title="Needs Review"
|
||||
filter_fields={{ created_at: { gt: lastMonth } }}
|
||||
filter_status_counts={{ pending_review: { gt: 0 } }}
|
||||
/>
|
||||
</div>
|
||||
<FileGrid
|
||||
fileRoute="/file"
|
||||
includeColumns={[
|
||||
{
|
||||
standardColumn: "file_name",
|
||||
},
|
||||
{
|
||||
standardColumn: "status",
|
||||
},
|
||||
{
|
||||
standardColumn: "synced_at",
|
||||
},
|
||||
{
|
||||
standardColumn: "created_at",
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
import { z } from "zod/v4";
|
||||
|
||||
export const Placeholder = z.object({
|
||||
name: z.string(),
|
||||
age: z.number(),
|
||||
});
|
||||
|
||||
export type Placeholder = z.infer<typeof Placeholder>;
|
||||
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2017",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
# place secrets here
|
||||
EXTRACTED_DATA_TOKEN="{{project_name}}"
|
||||
LLAMA_CLOUD_API_KEY="ADD ME"
|
||||
# these may be required for your API key or depending on the API called
|
||||
LLAMA_CLOUD_PROJECT_ID=""
|
||||
LLAMA_CLOUD_PROJECT_ID=""
|
||||
@@ -0,0 +1,28 @@
|
||||
import functools
|
||||
import os
|
||||
|
||||
from extracted_data_client import AuthenticatedClient
|
||||
from llama_cloud_services import LlamaExtract
|
||||
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# Add getters for clients and environment variables here.
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def get_extract_api() -> LlamaExtract:
|
||||
return LlamaExtract(
|
||||
api_key=os.environ["LLAMA_CLOUD_API_KEY"],
|
||||
project_id=os.environ.get("LLAMA_CLOUD_PROJECT_ID", None),
|
||||
organization_id=os.environ.get("LLAMA_CLOUD_ORG_ID", None),
|
||||
)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def get_extracted_data_client():
|
||||
return AuthenticatedClient(
|
||||
base_url=os.environ.get("EXTRACTED_DATA_BASE_URL", "http://localhost:9182"),
|
||||
token=os.environ["EXTRACTED_DATA_TOKEN"],
|
||||
)
|
||||
@@ -0,0 +1,59 @@
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from llama_index.core.workflow import (
|
||||
Context,
|
||||
Event,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
step,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProcessFile(Event):
|
||||
file_path: str
|
||||
total_files: int
|
||||
|
||||
|
||||
class ProcessFileCompleted(Event):
|
||||
total_files: int
|
||||
did_succeed: bool
|
||||
|
||||
|
||||
LOOP_SLEEP_TIME = 60
|
||||
|
||||
|
||||
class LoopFiles(Workflow):
|
||||
"""
|
||||
This workflow is a simple loop that processes all files in the data directory.
|
||||
It will process the files in parallel, and then sleep for a configurable amount of time.
|
||||
"""
|
||||
|
||||
@step
|
||||
async def start(self, event: StartEvent, ctx: Context) -> StartEvent | StopEvent:
|
||||
logger.info("Checking for files to process")
|
||||
# Get the files to process
|
||||
# check if they have already been processed
|
||||
# process the files that haven't been processed
|
||||
# sleep a bit
|
||||
await asyncio.sleep(LOOP_SLEEP_TIME)
|
||||
return StartEvent()
|
||||
|
||||
workflow = LoopFiles(timeout=None)
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
async def main():
|
||||
await workflow.run()
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(main())
|
||||
finally:
|
||||
loop.close()
|
||||
@@ -0,0 +1,22 @@
|
||||
import logging
|
||||
|
||||
from llama_index.core.workflow import StartEvent, StopEvent, Workflow, step
|
||||
from llama_index.core.workflow.retry_policy import ConstantDelayRetryPolicy
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileEvent(StartEvent):
|
||||
file_path: str
|
||||
|
||||
|
||||
class ProcessFileWorkflow(Workflow):
|
||||
"""
|
||||
Given a file path, this workflow will process a single file through the custom extraction logic.
|
||||
"""
|
||||
|
||||
@step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10))
|
||||
async def run_file(self, event: FileEvent) -> StopEvent:
|
||||
logger.info(f"Processing file {event.file_path}")
|
||||
# TODO: process the file
|
||||
return StopEvent()
|
||||
@@ -0,0 +1,10 @@
|
||||
[project]
|
||||
name = "{{project_name}}-workflow"
|
||||
version = "0.1.0"
|
||||
description = "Extracts data"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"extraction-utils",
|
||||
"llama-cloud-services"
|
||||
]
|
||||
@@ -0,0 +1,6 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class MySchema(BaseModel):
|
||||
# Replace this with the actual schema you want to use for extraction
|
||||
pass
|
||||
@@ -0,0 +1,2 @@
|
||||
# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
|
||||
{{ _copier_answers|to_nice_yaml -}}
|
||||
Reference in New Issue
Block a user