diff --git a/.env.template b/.env.template new file mode 100644 index 0000000..3da3625 --- /dev/null +++ b/.env.template @@ -0,0 +1,21 @@ +# LlamaCloud API Configuration +# Get these values from https://cloud.llamaindex.ai/ + +# Your LlamaCloud API key (required) +LLAMA_CLOUD_API_KEY=your_api_key_here + +# Your LlamaCloud project ID (required) +LLAMA_CLOUD_PROJECT_ID=your_project_id_here + +# Your LlamaCloud organization ID (required) +LLAMA_CLOUD_ORGANIZATION_ID=your_organization_id_here + +# Name for your extraction agent (optional - defaults to "invoice_extraction_agent") +LLAMA_CLOUD_AGENT_NAME=invoice_extraction_agent + +# Instructions: +# 1. Copy this file to .env: cp .env.template .env +# 2. Replace the placeholder values with your actual LlamaCloud credentials +# 3. Save the .env file +# 4. Run: python create_agent.py +# 5. Run: python sample.py \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d3c1754 --- /dev/null +++ b/.gitignore @@ -0,0 +1,83 @@ +# Environment variables +.env +.env.local +.env.production + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ +.env/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Streamlit +.streamlit/ + +# Temporary files and development folders +tmp_*/ +temp/ +*.tmp +*.log + +# Node modules (for any JS tooling) +node_modules/ +*.log +npm-debug.log* + +# Next.js (excluding from repo) +nextjs_app/ +nextjs_app2/ + +# Coverage reports +htmlcov/ +.coverage +.coverage.* +coverage.xml +*.cover + +# Pytest +.pytest_cache/ + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json \ No newline at end of file diff --git a/README.md b/README.md index e66d3f3..d02476c 100644 --- a/README.md +++ b/README.md @@ -1,62 +1,177 @@ -# Invoice Extraction Streamlit App +# Invoice Processing App - Vibe Coding Example -A simple Streamlit application that allows users to upload invoice images and extract structured data using LlamaCloud's extraction agent. +This repository demonstrates how to "vibe code" a full-stack invoice processing application using Cursor/Claude Code from a simple starting file. This showcases building a complete Streamlit application with LlamaCloud's document extraction capabilities. -## Features +## šŸš€ Quick Start - Vibe Coding Your Own App -- šŸ“„ Upload invoice images (JPG, JPEG, PNG, BMP, TIFF) -- šŸ” Automatic data extraction using LlamaCloud's kaggle_invoice_agent -- šŸ“Š Display extracted data in both JSON and formatted views -- šŸŽØ Clean, modern UI with progress indicators +This repository demonstrates **vibe coding** - rapidly building applications through iterative AI-assisted development. The goal is to transform a simple script into a full-featured application using Cursor/Claude Code. -## Setup +### Prerequisites +- Python 3.11+ +- LlamaCloud account and API key ([Get one here](https://cloud.llamaindex.ai/)) -1. **Install dependencies:** - ```bash - pip install -r requirements.txt - ``` +### Setup Process -2. **Set up environment variables:** - Create a `.env` file in the project root with your LlamaCloud API credentials: - ``` - LLAMA_CLOUD_API_KEY=your_api_key_here - ``` - -3. **Run the app:** - ```bash - streamlit run app.py - ``` - -4. **Open your browser:** - The app will be available at `http://localhost:8501` - -## Usage - -1. Upload an invoice image using the file uploader -2. Click "Extract Data" to run the extraction -3. View the extracted data in both JSON and formatted formats -4. The sidebar shows configuration details and app information - -## Configuration - -The app uses the following configuration (from `sample.py`): -- **Project ID:** `2fef999e-1073-40e6-aeb3-1f3c0e64d99b` -- **Organization ID:** `43b88c8f-e488-46f6-9013-698e3d2e374a` -- **Agent:** `kaggle_invoice_agent` - -## File Structure - -``` -jerry_invoice_streamlit/ -ā”œā”€ā”€ app.py # Main Streamlit application -ā”œā”€ā”€ sample.py # Original sample code -ā”œā”€ā”€ requirements.txt # Python dependencies -ā”œā”€ā”€ README.md # This file -└── .env # Environment variables (create this) +#### 1. **Clone and Install** +```bash +git clone +cd invoice-extraction-vibe-coding +pip install -r requirements.txt ``` -## Troubleshooting +#### 2. **Set Up LlamaCloud** +1. Go to [LlamaCloud](https://cloud.llamaindex.ai/) and log in +2. Create a new project and note your `project_id` and `organization_id` +3. Get your API key from your account settings -- **API Key Issues:** Make sure your `.env` file contains the correct `LLAMA_CLOUD_API_KEY` -- **Import Errors:** Ensure all dependencies are installed with `pip install -r requirements.txt` -- **File Upload Issues:** Check that your image file is in a supported format \ No newline at end of file +#### 3. **Configure Environment** +Copy the template and add your credentials: +```bash +cp .env.template .env +``` +Then edit `.env` with your LlamaCloud credentials: +```env +LLAMA_CLOUD_API_KEY=your_actual_api_key +LLAMA_CLOUD_PROJECT_ID=your_actual_project_id +LLAMA_CLOUD_ORGANIZATION_ID=your_actual_organization_id +LLAMA_CLOUD_AGENT_NAME=invoice_extraction_agent +``` + +#### 4. **Create Extraction Agent** +```bash +python create_agent.py +``` +This automatically creates an extraction agent in LlamaCloud using the invoice schema from `sample_data/sample_schema.py`. + +#### 5. **Test Your Setup** +```bash +python sample.py +``` +This validates your configuration and runs extraction on the sample invoice. You should see structured JSON output. + +#### 6. **Start Vibe Coding!** +Now comes the fun part - use the `cursor_prompt.md` template to transform `sample.py` into a full application: + +1. Open your project in Cursor or Claude Code +2. Copy the prompt from `cursor_prompt.md` +3. Start iterating: "Transform this simple script into a professional Streamlit app with..." +4. Build features incrementally through natural language prompts + +#### 7. **Compare Your Result** +When you're done vibe coding, compare your creation with our `app.py` to see different approaches! + +## šŸ“ Project Structure + +``` +invoice-extraction-vibe-coding/ +ā”œā”€ā”€ app.py # Full-featured Streamlit application (generated) +ā”œā”€ā”€ sample.py # Starting point - simple extraction script +ā”œā”€ā”€ create_agent.py # Script to create LlamaCloud extraction agent +ā”œā”€ā”€ .env.template # Environment variables template +ā”œā”€ā”€ requirements.txt # Python dependencies +ā”œā”€ā”€ cursor_prompt.md # Cursor prompt template for vibe coding +└── sample_data/ # Sample invoice data + ā”œā”€ā”€ batch1-0274.jpg # Sample invoice image + └── sample_schema.py # Pydantic models for data structure +``` + +## šŸŽÆ What This Demonstrates + +### From Simple Script to Full App +- **Starting Point**: `sample.py` - A basic 20-line script that calls LlamaCloud extraction +- **End Result**: `app.py` - A fully-featured Streamlit application with: + - Professional UI with custom CSS + - File upload functionality + - Real-time data extraction + - Structured data display + - Invoice history tracking + - Status indicators and metrics + +### LlamaCloud Integration +- Document parsing and extraction using LlamaExtract +- Structured data output using predefined schemas +- Integration with kaggle_invoice_agent + +## šŸ›  Vibe Coding with the Prompt Template + +This project demonstrates **vibe coding** - rapidly building applications through conversational AI development. + +### Start with `cursor_prompt.md` +The `cursor_prompt.md` file contains a comprehensive prompt template that can potentially **one-shot** the entire application transformation: + +1. **Copy the main prompt** from `cursor_prompt.md` +2. **Paste it into Cursor/Claude Code** with your `sample.py` file open +3. **Watch the magic happen** - the AI may build the entire Streamlit app in one go! + +### The Vibe Coding Process: +- **Start Simple**: `sample.py` - a working 20-line script +- **Use Natural Language**: Describe what you want, not how to build it +- **Iterate if Needed**: Add features through follow-up prompts +- **Build Incrementally**: Test and refine each addition + +### Why the Template Works: +- **Comprehensive Scope**: Covers UI, functionality, and user experience +- **Clear Context**: References your actual files and sample data +- **Specific Examples**: Shows exactly what features to build +- **Production-Ready**: Asks for professional-quality output + +Try the full prompt first - you might be surprised how much gets built in a single interaction! + +## šŸ”§ Features + +### Current Application (`app.py`) +- šŸ“„ **Multi-format Support**: JPG, JPEG, PNG, BMP, TIFF +- šŸŽØ **Professional UI**: Custom CSS styling and responsive design +- šŸ“Š **Data Visualization**: Structured invoice data display +- šŸ“ˆ **Analytics**: Processing metrics and status tracking +- šŸ”„ **Real-time Processing**: Live extraction with progress indicators +- šŸ’¾ **Session Storage**: Invoice history within session + +### Data Schema +The application extracts structured invoice data including: +- Invoice metadata (number, date) +- Seller and client information +- Line items with pricing details +- VAT calculations and summaries +- Total amounts + +See `sample_data/sample_schema.py` for the complete Pydantic model definitions. + +## šŸ“ Usage + +1. **Upload Invoice**: Drag and drop or select an invoice image +2. **Extract Data**: Click the extraction button to process with LlamaCloud +3. **View Results**: See structured data in formatted tables and JSON +4. **Track History**: View processed invoices in the history tab + +## šŸ”‘ Configuration + +Update these values in your application for your own LlamaCloud setup: +```python +project_id = "your-project-id" +organization_id = "your-organization-id" +agent_name = "your-agent-name" +``` + +## šŸ› Troubleshooting + +### Common Issues +- **API Key**: Ensure your LlamaCloud API key is valid and in `.env` +- **Dependencies**: Run `pip install -r requirements.txt` +- **File Formats**: Only image formats are supported +- **File Size**: Keep images under 10MB for best performance + +## šŸ¤ Contributing + +This is an educational example demonstrating the vibe coding approach. Feel free to: +- Fork and modify for your use case +- Share improvements and variations +- Use as a starting point for your own projects + +## šŸ“„ License + +Open source - use freely for learning and development. + +--- + +**Built with ā¤ļø using LlamaIndex, LlamaCloud, and the power of vibe coding with Cursor/Claude Code!** \ No newline at end of file diff --git a/create_agent.py b/create_agent.py new file mode 100644 index 0000000..ac1ad5f --- /dev/null +++ b/create_agent.py @@ -0,0 +1,126 @@ +""" +Create LlamaCloud extraction agent for invoice processing. + +Run this script to automatically create an extraction agent with the invoice schema +before testing with sample.py or building your Streamlit app. +""" + +import os +import sys +from dotenv import load_dotenv +from llama_cloud_services import LlamaExtract +from llama_cloud.core.api_error import ApiError +from sample_data.sample_schema import Invoice + +# Load environment variables +load_dotenv() + +def create_invoice_agent(): + """Create or update the invoice extraction agent.""" + + # Get configuration + project_id = os.getenv("LLAMA_CLOUD_PROJECT_ID") + organization_id = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID") + agent_name = os.getenv("LLAMA_CLOUD_AGENT_NAME", "invoice_extraction_agent") + + # Validate configuration + if not project_id or project_id == "your-project-id-here": + print("āŒ Error: LLAMA_CLOUD_PROJECT_ID not configured!") + print("Please set your project ID in the .env file") + return False + + if not organization_id or organization_id == "your-organization-id-here": + print("āŒ Error: LLAMA_CLOUD_ORGANIZATION_ID not configured!") + print("Please set your organization ID in the .env file") + return False + + try: + print("šŸš€ Initializing LlamaCloud extraction service...") + + # Initialize LlamaExtract + extract = LlamaExtract( + show_progress=True, + check_interval=5, + project_id=project_id, + organization_id=organization_id + ) + + print(f"āœ… Connected to LlamaCloud") + print(f"šŸ“‹ Project ID: {project_id}") + print(f"šŸ¢ Organization ID: {organization_id}") + print(f"šŸ¤– Agent name: {agent_name}") + + # Check if agent already exists + try: + existing_agent = extract.get_agent(name=agent_name) + if existing_agent: + print(f"āœ… Agent '{agent_name}' already exists!") + print(f"šŸ¤– Agent ID: {existing_agent.id}") + print("šŸ“ You can use this existing agent for extraction.") + return True + except ApiError as e: + if e.status_code == 404: + print(f"šŸ“ Agent '{agent_name}' does not exist, creating new one...") + else: + raise + + # Create new agent with invoice schema + print("šŸ”§ Creating new extraction agent...") + print("šŸ“Š Using invoice schema from sample_data/sample_schema.py") + + agent = extract.create_agent( + name=agent_name, + data_schema=Invoice + ) + + print("šŸŽ‰ Success! Extraction agent created successfully!") + print(f"šŸ¤– Agent ID: {agent.id}") + print(f"šŸ“ Agent Name: {agent.name}") + + print("\nšŸ“‹ Schema Summary:") + print(" • Invoice metadata (number, date)") + print(" • Seller and client information") + print(" • Line items with pricing details") + print(" • VAT calculations and summaries") + print(" • Total amounts") + + print(f"\nāœ… Setup complete! Your agent '{agent_name}' is ready to use.") + print("\nšŸš€ Next steps:") + print("1. Run 'python sample.py' to test extraction") + print("2. Use 'cursor_prompt.md' to build your Streamlit app") + + return True + + except Exception as e: + print(f"āŒ Error creating agent: {e}") + print("\nšŸ”§ Troubleshooting:") + print("1. Check your .env file has correct credentials") + print("2. Verify your LlamaCloud account has necessary permissions") + print("3. Ensure you have a valid project and organization setup") + return False + +def main(): + """Main function.""" + print("šŸ”Ø LlamaCloud Invoice Extraction Agent Setup") + print("=" * 50) + + # Check if .env file exists + if not os.path.exists(".env"): + print("āŒ No .env file found!") + print("\nPlease create a .env file with:") + print("LLAMA_CLOUD_API_KEY=your_api_key_here") + print("LLAMA_CLOUD_PROJECT_ID=your_project_id_here") + print("LLAMA_CLOUD_ORGANIZATION_ID=your_organization_id_here") + print("LLAMA_CLOUD_AGENT_NAME=invoice_extraction_agent") + sys.exit(1) + + # Create agent + success = create_invoice_agent() + + if success: + sys.exit(0) + else: + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cursor_prompt.md b/cursor_prompt.md new file mode 100644 index 0000000..265dcea --- /dev/null +++ b/cursor_prompt.md @@ -0,0 +1,111 @@ +# Cursor Prompt for Invoice Processing App + +Use this prompt to transform the simple `sample.py` script into a full-featured Streamlit invoice processing application. + +## Main Prompt + +``` +I have a simple piece of sample code (sample.py) that initializes a LlamaCloud extraction agent and runs extraction over a sample invoice image. I want to transform this into a professional Streamlit web application. + +Here's what I'm starting with: +- sample.py: Basic extraction script using LlamaExtract with kaggle_invoice_agent +- sample_data/batch1-0274.jpg: Sample invoice image for testing +- sample_data/sample_schema.py: Pydantic models defining the expected invoice data structure + +Transform this into a comprehensive Streamlit app with: + +## Core Features +1. **File Upload Interface** + - Support multiple document formats: + - PDF files (.pdf) + - Word documents (.docx) + - Image files (.jpg, .jpeg, .png, .bmp, .tiff) + - And other common document formats supported by LlamaCloud + - Drag-and-drop functionality + - Document preview before processing + - File validation and size limits + +2. **Data Extraction & Display** + - Use the same LlamaExtract agent from sample.py + - Process uploaded invoices in real-time + - Display extracted data using the schema structure from sample_schema.py + - Show both formatted view and raw JSON + +3. **Professional UI Design** + - Modern, clean interface with custom CSS + - Responsive layout with proper spacing + - Professional color scheme and typography + - Loading states and progress indicators + +## Data Presentation +Based on the sample_schema.py structure, display: +- Invoice overview (number, date, status) +- Seller and client information in organized cards +- Line items in a structured table +- VAT summary with calculations +- Total amounts prominently displayed + +## Additional Features +- Session-based invoice history +- Processing status and metrics in sidebar +- Error handling with user-friendly messages +- System status indicators +- Usage statistics and tips + +## Technical Requirements +- Use the same project_id and organization_id from sample.py +- Maintain the kaggle_invoice_agent configuration +- Include proper environment variable handling for API keys +- Add comprehensive error handling for API failures + +Make it production-ready with clean code structure, proper documentation, and professional presentation. The app should feel like a commercial invoice processing tool while maintaining the simplicity of the original extraction logic. + +Test with the provided sample_data/batch1-0274.jpg to ensure everything works correctly. +``` + +## Follow-up Enhancement Prompts + +### UI Polish +``` +Enhance the visual design with: +- Custom CSS for a more professional look +- Better color scheme (consider invoice/business themes) +- Improved typography and spacing +- Status badges and progress indicators +- Responsive design for different screen sizes +``` + +### Feature Expansion +``` +Add these capabilities: +- Export results to JSON/CSV +- Batch processing of multiple invoices +- Invoice data validation against schema +- Search and filter processed invoices +- Comparison between different invoices +``` + +### User Experience +``` +Improve the user experience with: +- Better error messages and recovery options +- Helpful tooltips and guidance +- Keyboard shortcuts and accessibility +- Auto-save of processing results +- Undo/redo functionality where applicable +``` + +## Key Context Files + +When using this prompt, make sure to include: +- `sample.py` - The starting extraction script +- `sample_data/sample_schema.py` - Pydantic models for data structure +- `sample_data/batch1-0274.jpg` - Test invoice image +- Current `requirements.txt` - Dependencies list + +## Expected Transformation + +**From:** 20-line extraction script +**To:** Full-featured web application with professional UI, comprehensive functionality, file handling, data visualization, and production-ready code quality. + +The result should demonstrate the power of "vibe coding" - rapidly building complex applications through iterative AI-assisted development. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5202220..5e8ebda 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ streamlit>=1.28.0 -python-dotenv>=1.0.0 -llama-cloud-services>=0.1.0 -llama-cloud>=0.1.0 \ No newline at end of file +python-dotenv>=1.1.1 +llama-cloud-services==0.6.49 +llama-cloud==0.1.34 \ No newline at end of file diff --git a/sample.py b/sample.py index 82d97bb..362a33c 100644 --- a/sample.py +++ b/sample.py @@ -1,22 +1,83 @@ +""" +Simple invoice extraction script using LlamaCloud. + +This is the starting point for vibe coding a full Streamlit application. +Configure your LlamaCloud credentials and run this script to test extraction +before building the full web application. +""" + +import os +import json from dotenv import load_dotenv from llama_cloud_services import LlamaExtract from llama_cloud.core.api_error import ApiError +# Load environment variables +load_dotenv() -project_id = "2fef999e-1073-40e6-aeb3-1f3c0e64d99b" -organization_id = "43b88c8f-e488-46f6-9013-698e3d2e374a" +# Configuration - Update these with your LlamaCloud details +PROJECT_ID = os.getenv("LLAMA_CLOUD_PROJECT_ID", "your-project-id-here") +ORGANIZATION_ID = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID", "your-organization-id-here") +AGENT_NAME = os.getenv("LLAMA_CLOUD_AGENT_NAME", "your-agent-name-here") -# Optionally, add your project id/organization id -extract = LlamaExtract( - show_progress=False, - check_interval=5, - project_id=project_id, - organization_id=organization_id -) +# Sample image path +SAMPLE_IMAGE = "sample_data/batch1-0274.jpg" +def main(): + """Run invoice extraction on sample image.""" + + # Validate configuration + if PROJECT_ID == "your-project-id-here" or ORGANIZATION_ID == "your-organization-id-here": + print("āŒ Please configure your LlamaCloud credentials!") + print("Update your .env file with:") + print("LLAMA_CLOUD_PROJECT_ID=your-actual-project-id") + print("LLAMA_CLOUD_ORGANIZATION_ID=your-actual-organization-id") + print("LLAMA_CLOUD_AGENT_NAME=your-actual-agent-name") + return + + try: + print("šŸš€ Initializing LlamaCloud extraction agent...") + + # Initialize the extraction service + extract = LlamaExtract( + show_progress=True, # Show progress for better UX + check_interval=5, + project_id=PROJECT_ID, + organization_id=ORGANIZATION_ID + ) + + # Get the configured agent + agent = extract.get_agent(name=AGENT_NAME) + print(f"āœ… Successfully connected to agent: {AGENT_NAME}") + + # Run extraction on sample image + print(f"šŸ“„ Processing sample invoice: {SAMPLE_IMAGE}") + result = agent.extract(SAMPLE_IMAGE) + + # Display results + print("āœ… Extraction completed successfully!") + print("\nšŸ“Š Extracted Data:") + print("=" * 50) + print(json.dumps(result.data, indent=2)) + + # Save results for reference + output_file = "sample_output.json" + with open(output_file, "w") as f: + json.dump(result.data, f, indent=2) + print(f"\nšŸ’¾ Results saved to: {output_file}") + + print("\nšŸŽ‰ Success! You're ready to start vibe coding the Streamlit app!") + print("Use cursor_prompt.md to transform this script into a full application.") + + except ApiError as e: + print(f"āŒ API Error: {e}") + print("Check your API key and agent configuration.") + except FileNotFoundError: + print(f"āŒ Sample image not found: {SAMPLE_IMAGE}") + print("Make sure the sample_data directory exists with the sample image.") + except Exception as e: + print(f"āŒ Unexpected error: {e}") -agent = extract.get_agent(name="kaggle_invoice_agent") - -result = agent.extract("batch1-0274.jpg") -result.data # outputs json of the extracted output according to the schema defined in the agent +if __name__ == "__main__": + main()