Delete parse.md

Delete index.md
Delete extract.md
2026-07-01 21:44:37 -04:00 · 2026-03-24 19:27:52 -06:00 · 2026-03-24 19:27:41 -06:00 · 2026-03-24 19:27:25 -06:00 · 2026-03-24 19:26:59 -06:00 · 2026-02-16 16:16:15 -08:00
111 changed files with 23386 additions and 35895 deletions
@@ -0,0 +1,162 @@
+name: Extract E2E Tests (every 4 hours)
+
+on:
+  schedule:
+    - cron: "0 */4 * * *"
+  workflow_dispatch:
+    # Allows manual triggering
+    inputs:
+      environment:
+        description: "Environment to run the tests in"
+        required: false
+        default: staging
+        type: choice
+        options:
+          - staging
+          - production
+      notify_slack:
+        description: "Notify Slack"
+        required: false
+        default: false
+        type: boolean
+  workflow_call:
+
+env:
+  UV_VERSION: "0.7.20"
+  PYTHON_VERSION: "3.12"
+  SLACK_CHANNEL_ID: C078PHNTF44 # Extract channel ID
+  API_E2E_LOG_PATH: ${{ github.workspace }}/extract-e2e.log
+
+jobs:
+  extract-e2e:
+    name: "Extract E2E Tests (${{ matrix.environment }})"
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.environment }}
+      cancel-in-progress: true
+    strategy:
+      fail-fast: false
+      matrix:
+        environment: ${{ github.event_name == 'schedule' && fromJson('["staging", "production"]') || fromJson(format('["{0}"]', github.event.inputs.environment || 'staging')) }}
+    steps:
+      - name: Set runtime inputs
+        id: runtime
+        run: |
+          environment=${{ matrix.environment }}
+          notify_slack=${{ github.event.inputs.notify_slack || github.event_name == 'schedule' }}
+          echo "environment=${environment}" >> $GITHUB_OUTPUT
+          echo "notify_slack=${notify_slack}" >> $GITHUB_OUTPUT
+
+          if [ "${environment}" = "production" ]; then
+            echo "LLAMA_CLOUD_BASE_URL=https://api.cloud.llamaindex.ai" >> $GITHUB_ENV
+            api_key_secret="${{ secrets.LLAMA_CLOUD_API_KEY }}"
+            project_id_secret="${{ secrets.LLAMA_CLOUD_PROJECT_ID }}"
+          else
+            echo "LLAMA_CLOUD_BASE_URL=https://api.staging.llamaindex.ai" >> $GITHUB_ENV
+            api_key_secret="${{ secrets.LLAMA_CLOUD_API_KEY_STAGING }}"
+            project_id_secret="${{ secrets.LLAMA_CLOUD_PROJECT_ID_STAGING }}"
+          fi
+
+          if [ -n "$api_key_secret" ]; then
+            echo "LLAMA_CLOUD_API_KEY=$api_key_secret" >> $GITHUB_ENV
+          fi
+
+          if [ -n "$project_id_secret" ]; then
+            echo "LLAMA_CLOUD_PROJECT_ID=$project_id_secret" >> $GITHUB_ENV
+          fi
+
+      - uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          version: ${{ env.UV_VERSION }}
+
+      - name: Set up Python
+        run: uv python install ${{ env.PYTHON_VERSION }} && uv python pin ${{ env.PYTHON_VERSION }}
+
+      - name: Run Extract E2E tests
+        id: extract-tests
+        continue-on-error: true
+        working-directory: py
+        run: |
+          set -o pipefail
+          rm -f "$API_E2E_LOG_PATH"
+          uv run pytest -v -n 8 --timeout=300 --session-timeout=1740 tests/extract/ 2>&1 | tee "$API_E2E_LOG_PATH"
+
+      - name: Extract pytest failure summary
+        id: failed-tests
+        if: steps.extract-tests.outcome == 'failure' || cancelled()
+        run: |
+          summary="$(python3 - <<'PY'
+          import os
+          import re
+          from pathlib import Path
+
+          log_path = Path(os.environ["API_E2E_LOG_PATH"])
+          if not log_path.exists():
+              print("Test log not found.")
+              raise SystemExit(0)
+
+          lines = log_path.read_text(errors="ignore").splitlines()
+
+          # Find the "short test summary info" section
+          start = None
+          for i, line in enumerate(lines):
+              if line.startswith("=") and "short test summary info" in line:
+                  start = i + 1
+                  break
+
+          if start is None:
+              print("No test summary found.")
+              raise SystemExit(0)
+
+          # Extract just the FAILED/ERROR lines (test name + short reason)
+          failed_tests = []
+          for line in lines[start:]:
+              if line.startswith("="):
+                  break  # End of section
+              if line.startswith("FAILED ") or line.startswith("ERROR "):
+                  # Extract test name and truncate the error message
+                  match = re.match(r"(FAILED|ERROR) ([\w/:.\[\]_-]+)", line)
+                  if match:
+                      failed_tests.append(f"{match.group(1)}: {match.group(2)}")
+
+          if failed_tests:
+              print("\n".join(failed_tests[:20]))  # Limit to 20 tests max
+          else:
+              print("No failed tests found in summary.")
+          PY
+          )"
+          if [ -z "$summary" ]; then
+            summary="Failed test summary not available. Review the full run logs."
+          fi
+          {
+            printf 'summary<<EOF\n%s\nEOF\n' "$summary"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Check test results
+        if: always()
+        run: |
+          if [ "${{ steps.extract-tests.outcome }}" == "failure" ]; then
+            echo "Extract E2E tests failed"
+            exit 1
+          fi
+
+      - name: Post to Extract Slack channel
+        id: slack
+        if: (failure() || cancelled()) && steps.runtime.outputs.notify_slack == 'true'
+        uses: slackapi/slack-github-action@v2.1.1
+        with:
+          channel-id: ${{ env.SLACK_CHANNEL_ID }}
+          slack-message: |
+            :red_circle: *Extract E2E Failed* (${{ steps.runtime.outputs.environment }})
+            ```
+            ${{ steps.failed-tests.outputs.summary }}
+            ```
+            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run>
+        env:
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -22,7 +22,7 @@ repos:
    hooks:
      - id: ruff
        args: [--fix, --exit-non-zero-on-fix]
-        exclude: ".*uv.lock"
+        exclude: ".*uv.lock|examples/"
  - repo: https://github.com/psf/black-pre-commit-mirror
    rev: 23.10.1
    hooks:
@@ -4,77 +4,12 @@

 # Llama Cloud Services

-This repository contains the code for hand-written SDKs and clients for interacting with LlamaCloud.
-
-This includes:
-
- [LlamaParse](./parse.md) - A GenAI-native document parser that can parse complex document data for any downstream LLM use case (Agents, RAG, data processing, etc.).
- [LlamaExtract](./extract.md) - A prebuilt agentic data extractor that can be used to transform data into a structured JSON representation.
- [LlamaCloud Index](./index.md) - A widely customizable and fully automated document ingestion pipeline that also serves retrieval purposes.
-
-## Getting Started
-
-Install the package:
-
-```bash
-pip install llama-cloud-services
-```
-
-Then, get your API key from [LlamaCloud](https://cloud.llamaindex.ai/).
-
-Then, you can use the services in your code:
-
-```python
-from llama_cloud_services import (
-    LlamaParse,
-    LlamaExtract,
-    LlamaCloudIndex,
-)
-
-parser = LlamaParse(api_key="YOUR_API_KEY")
-extract = LlamaExtract(api_key="YOUR_API_KEY")
-index = LlamaCloudIndex(
-    "my_first_index", project_name="default", api_key="YOUR_API_KEY"
-)
-```
-
-See the quickstart guides for each service for more information:
-
- [LlamaParse](./parse.md)
- [LlamaExtract](./extract.md)
- [LlamaCloud Index](./index.md)
-
-## Switch to EU SaaS 🇪🇺
-
-If you are interested in using LlamaCloud services in the EU, you can adjust your base URL to `https://api.cloud.eu.llamaindex.ai`.
-
-You can also create your API key in the EU region [here](https://cloud.eu.llamaindex.ai).
-
-```python
-from llama_cloud_services import (
-    LlamaParse,
-    LlamaExtract,
-    EU_BASE_URL,
-)
-
-parser = LlamaParse(api_key="YOUR_API_KEY", base_url=EU_BASE_URL)
-extract = LlamaExtract(api_key="YOUR_API_KEY", base_url=EU_BASE_URL)
-index = LlamaCloudIndex(
-    "my_first_index",
-    project_name="default",
-    api_key="YOUR_API_KEY",
-    base_url=EU_BASE_URL,
-)
-```
-
-## Documentation
-
-You can see complete SDK and API documentation for each service on [our official docs](https://docs.cloud.llamaindex.ai/).
-
-## Terms of Service
-
-See the [Terms of Service Here](./TOS.pdf).
-
-## Get in Touch (LlamaCloud)
-
-You can get in touch with us by following our [contact link](https://www.llamaindex.ai/contact).
+> **⚠️ DEPRECATION NOTICE**
+>
+> This repository and its packages are deprecated and will be maintained until **May 1, 2026**.
+>
+> **Please migrate to the new packages:**
+> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))
+> - **TypeScript**: `npm install @llamaindex/llama-cloud` ([GitHub](https://github.com/run-llama/llama-cloud-ts))
+>
+> The new packages provide the same functionality with improved performance, better support, and active development.
@@ -1,4 +1,14 @@
 # LlamaCloud Services Examples - Python
+> **⚠️ DEPRECATION NOTICE**
+>
+> This repository and its packages are deprecated and will be maintained until **May 1, 2026**.
+>
+> **Please migrate to the new packages:**
+> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))
+> - **TypeScript**: `npm install @llamaindex/llama-cloud` ([GitHub](https://github.com/run-llama/llama-cloud-ts))
+>
+> The new packages provide the same functionality with improved performance, better support, and active development.
+

 In this folder you will find several python notebooks that contain examples regarding:

@@ -0,0 +1 @@
+sample_files/
@@ -0,0 +1,815 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cell-0",
+   "metadata": {},
+   "source": [
+    "# Batch Parse with LlamaCloud Directories\n",
+    "\n",
+    "This notebook demonstrates how to use LlamaCloud's batch processing API to parse multiple files in a directory. The workflow includes:\n",
+    "\n",
+    "1. **Creating a Directory** - Set up a directory to organize your files\n",
+    "2. **Uploading Files** - Upload multiple files to the directory\n",
+    "3. **Starting a Batch Parse Job** - Kick off batch processing on all files\n",
+    "4. **Monitoring Progress** - Check the status and view results\n",
+    "\n",
+    "This is useful when you need to parse many documents at once, as the batch API handles the orchestration and provides progress tracking."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c2b5e1a",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-1",
+   "metadata": {},
+   "source": [
+    "## Setup and Installation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install llama-cloud python-dotenv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "import httpx\n",
+    "\n",
+    "# Load environment variables\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Set your API key\n",
+    "LLAMA_CLOUD_API_KEY = os.environ.get(\"LLAMA_CLOUD_API_KEY\", \"llx-...\")\n",
+    "\n",
+    "# Optional: Set base URL (defaults to https://api.cloud.llamaindex.ai if not set)\n",
+    "LLAMA_CLOUD_BASE_URL = os.environ.get(\n",
+    "    \"LLAMA_CLOUD_BASE_URL\", \"https://api.cloud.llamaindex.ai\"\n",
+    ")\n",
+    "\n",
+    "# Optional: Set project_id if you have one, otherwise it will use your default project\n",
+    "PROJECT_ID = os.environ.get(\"LLAMA_CLOUD_PROJECT_ID\", None)\n",
+    "\n",
+    "print(\"✅ API key configured\")\n",
+    "print(f\"   Base URL: {LLAMA_CLOUD_BASE_URL}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-4",
+   "metadata": {},
+   "source": [
+    "## Setup HTTP Client\n",
+    "\n",
+    "Since the current version of the llama-cloud SDK has some issues with the beta endpoints, we'll use direct HTTP requests with httpx for reliability."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create HTTP client with authentication\n",
+    "headers = {\n",
+    "    \"Authorization\": f\"Bearer {LLAMA_CLOUD_API_KEY}\",\n",
+    "}\n",
+    "\n",
+    "print(\"✅ HTTP client configured\")\n",
+    "print(f\"   Using base URL: {LLAMA_CLOUD_BASE_URL}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-6",
+   "metadata": {},
+   "source": [
+    "## Step 1: Create a Directory\n",
+    "\n",
+    "First, we'll create a directory to organize our files. Directories help you group related files together for batch processing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "\n",
+    "# Create a directory with a timestamp in the name\n",
+    "timestamp = datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n",
+    "directory_name = f\"batch-parse-demo-{timestamp}\"\n",
+    "\n",
+    "# Create directory using HTTP request\n",
+    "response = httpx.post(\n",
+    "    f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/directories\",\n",
+    "    headers=headers,\n",
+    "    params={\"project_id\": PROJECT_ID},\n",
+    "    json={\n",
+    "        \"name\": directory_name,\n",
+    "        \"description\": \"Demo directory for batch parse example\",\n",
+    "    },\n",
+    "    timeout=60.0,\n",
+    ")\n",
+    "\n",
+    "if response.status_code in [200, 201]:\n",
+    "    directory = response.json()\n",
+    "    directory_id = directory[\"id\"]\n",
+    "    project_id = directory[\"project_id\"]\n",
+    "\n",
+    "    print(f\"✅ Created directory: {directory['name']}\")\n",
+    "    print(f\"   Directory ID: {directory_id}\")\n",
+    "    print(f\"   Project ID: {project_id}\")\n",
+    "else:\n",
+    "    raise Exception(\n",
+    "        f\"Failed to create directory: {response.status_code} - {response.text}\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-8",
+   "metadata": {},
+   "source": [
+    "## Step 2: Upload Files to the Directory\n",
+    "\n",
+    "Now we'll upload some files to our directory. For this demo, we'll download some sample PDFs and upload them.\n",
+    "\n",
+    "You can replace these with your own files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a directory for sample files\n",
+    "import requests\n",
+    "\n",
+    "os.makedirs(\"sample_files\", exist_ok=True)\n",
+    "\n",
+    "# Sample documents to download\n",
+    "sample_docs = {\n",
+    "    \"attention.pdf\": \"https://arxiv.org/pdf/1706.03762.pdf\",\n",
+    "    \"bert.pdf\": \"https://arxiv.org/pdf/1810.04805.pdf\",\n",
+    "}\n",
+    "\n",
+    "# Download sample documents\n",
+    "for filename, url in sample_docs.items():\n",
+    "    filepath = f\"sample_files/{filename}\"\n",
+    "    if not os.path.exists(filepath):\n",
+    "        print(f\"📥 Downloading {filename}...\")\n",
+    "        response = requests.get(url)\n",
+    "        if response.status_code == 200:\n",
+    "            with open(filepath, \"wb\") as f:\n",
+    "                f.write(response.content)\n",
+    "            print(f\"   ✅ Downloaded {filename}\")\n",
+    "        else:\n",
+    "            print(f\"   ❌ Failed to download {filename}\")\n",
+    "    else:\n",
+    "        print(f\"📁 {filename} already exists\")\n",
+    "\n",
+    "print(\"\\n✅ Sample files ready!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-10",
+   "metadata": {},
+   "source": [
+    "### Upload Files to Directory\n",
+    "\n",
+    "Now let's upload the files to our directory using the `upload_file_to_directory` endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "uploaded_files = []\n",
+    "\n",
+    "# Workaround: Use direct HTTP requests instead of SDK due to SDK bug\n",
+    "import httpx\n",
+    "\n",
+    "for filename in os.listdir(\"sample_files\"):\n",
+    "    if filename.endswith(\".pdf\"):\n",
+    "        filepath = f\"sample_files/{filename}\"\n",
+    "\n",
+    "        print(f\"📤 Uploading {filename}...\")\n",
+    "\n",
+    "        # Upload file using direct HTTP request (SDK has a bug with file uploads)\n",
+    "        with open(filepath, \"rb\") as f:\n",
+    "            # Prepare the multipart form data correctly\n",
+    "            files = {\"upload_file\": (filename, f, \"application/pdf\")}\n",
+    "\n",
+    "            # Make the request directly\n",
+    "            response = httpx.post(\n",
+    "                f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/directories/{directory_id}/files/upload\",\n",
+    "                params={\"project_id\": project_id},\n",
+    "                files=files,\n",
+    "                headers={\"Authorization\": f\"Bearer {LLAMA_CLOUD_API_KEY}\"},\n",
+    "                timeout=60.0,\n",
+    "            )\n",
+    "\n",
+    "            if response.status_code in [200, 201]:\n",
+    "                directory_file = response.json()\n",
+    "                uploaded_files.append(directory_file)\n",
+    "                print(f\"   ✅ Uploaded: {directory_file.get('display_name')}\")\n",
+    "                print(f\"      File ID: {directory_file.get('id')}\")\n",
+    "            else:\n",
+    "                print(f\"   ❌ Upload failed: {response.status_code}\")\n",
+    "                print(f\"      Error: {response.text[:200]}\")\n",
+    "\n",
+    "print(f\"\\n✅ Uploaded {len(uploaded_files)} files to directory\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-12",
+   "metadata": {},
+   "source": [
+    "## Step 3: Create a Batch Parse Job\n",
+    "\n",
+    "Now that we have files in our directory, let's create a batch parse job to process them all at once.\n",
+    "\n",
+    "The batch processing API uses the same configuration as LlamaParse."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Configure the parse job\n",
+    "# This configuration will apply to all files in the directory\n",
+    "job_config = {\n",
+    "    \"job_name\": \"parse_raw_file_job\",  # Must match the JobNames enum value\n",
+    "    \"partitions\": {},\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"parse\",\n",
+    "        \"lang\": \"en\",\n",
+    "        \"fast_mode\": True,\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "print(\"✅ Job configuration created\")\n",
+    "print(f\"   Language: {job_config['parameters']['lang']}\")\n",
+    "print(f\"   Fast mode: {job_config['parameters']['fast_mode']}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-14",
+   "metadata": {},
+   "source": [
+    "### Submit the Batch Job\n",
+    "\n",
+    "Now let's submit the batch job to process all files in the directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-15",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"🚀 Submitting batch parse job for directory: {directory_id}\")\n",
+    "print(f\"   Processing {len(uploaded_files)} files...\\n\")\n",
+    "\n",
+    "# Submit batch job using HTTP request\n",
+    "response = httpx.post(\n",
+    "    f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing\",\n",
+    "    headers=headers,\n",
+    "    params={\"project_id\": project_id},\n",
+    "    json={\n",
+    "        \"directory_id\": directory_id,\n",
+    "        \"job_config\": job_config,\n",
+    "        \"page_size\": 100,  # Number of files to fetch per batch\n",
+    "        \"continue_as_new_threshold\": 10,  # Workflow continuation threshold\n",
+    "    },\n",
+    "    timeout=60.0,\n",
+    ")\n",
+    "\n",
+    "if response.status_code in [200, 201]:\n",
+    "    batch_job = response.json()\n",
+    "    batch_job_id = batch_job[\"id\"]\n",
+    "\n",
+    "    print(\"✅ Batch job submitted successfully!\")\n",
+    "    print(f\"   Batch Job ID: {batch_job_id}\")\n",
+    "    print(f\"   Workflow ID: {batch_job.get('workflow_id')}\")\n",
+    "    print(f\"   Status: {batch_job.get('status')}\")\n",
+    "    print(f\"   Total Items: {batch_job.get('total_items')}\")\n",
+    "else:\n",
+    "    raise Exception(\n",
+    "        f\"Failed to create batch job: {response.status_code} - {response.text}\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-16",
+   "metadata": {},
+   "source": [
+    "## Step 4: Monitor Job Progress\n",
+    "\n",
+    "Now let's monitor the batch job progress. We'll poll the status endpoint to see how the job is progressing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-17",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "\n",
+    "def print_job_status(status_data):\n",
+    "    \"\"\"Helper function to print job status in a readable format.\"\"\"\n",
+    "    job = status_data[\"job\"]\n",
+    "    progress_pct = status_data[\"progress_percentage\"]\n",
+    "\n",
+    "    print(f\"\\n{'='*60}\")\n",
+    "    print(f\"Job Status: {job['status']}\")\n",
+    "    print(f\"{'='*60}\")\n",
+    "    print(f\"Total Items: {job['total_items']}\")\n",
+    "    print(f\"Completed: {job['processed_items']}\")\n",
+    "    print(f\"Failed: {job['failed_items']}\")\n",
+    "    print(f\"Skipped: {job['skipped_items']}\")\n",
+    "    print(f\"Progress: {progress_pct:.1f}%\")\n",
+    "\n",
+    "    if job.get(\"completed_at\"):\n",
+    "        print(f\"Completed At: {job['completed_at']}\")\n",
+    "    elif job.get(\"started_at\"):\n",
+    "        print(f\"Started At: {job['started_at']}\")\n",
+    "\n",
+    "    print(f\"{'='*60}\")\n",
+    "\n",
+    "\n",
+    "# Poll for status updates\n",
+    "print(\"🔄 Monitoring batch job progress...\")\n",
+    "print(\n",
+    "    \"Note: It may take a few seconds for the workflow to initialize and count files.\\n\"\n",
+    ")\n",
+    "\n",
+    "max_polls = 60  # Maximum number of status checks (increased for longer jobs)\n",
+    "poll_interval = 10  # Seconds between checks\n",
+    "\n",
+    "for i in range(max_polls):\n",
+    "    response = httpx.get(\n",
+    "        f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing/{batch_job_id}\",\n",
+    "        headers=headers,\n",
+    "        params={\"project_id\": project_id},\n",
+    "        timeout=60.0,\n",
+    "    )\n",
+    "\n",
+    "    if response.status_code == 200:\n",
+    "        status_data = response.json()\n",
+    "        print_job_status(status_data)\n",
+    "\n",
+    "        # Check if job is complete\n",
+    "        job_status = status_data[\"job\"][\"status\"]\n",
+    "        if job_status in [\"completed\", \"failed\", \"cancelled\"]:\n",
+    "            print(f\"\\n✅ Job finished with status: {job_status}\")\n",
+    "            break\n",
+    "\n",
+    "        if i < max_polls - 1:\n",
+    "            print(f\"\\n⏳ Waiting {poll_interval} seconds before next check...\")\n",
+    "            time.sleep(poll_interval)\n",
+    "    else:\n",
+    "        print(f\"Error getting status: {response.status_code} - {response.text}\")\n",
+    "        break\n",
+    "else:\n",
+    "    print(f\"\\n⚠️  Reached maximum polling attempts. Job may still be running.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-18",
+   "metadata": {},
+   "source": [
+    "## Step 5: View Job Items\n",
+    "\n",
+    "Let's look at the individual items in the batch job to see which files were processed successfully."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-19",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get all items in the batch job\n",
+    "response = httpx.get(\n",
+    "    f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing/{batch_job_id}/items\",\n",
+    "    headers=headers,\n",
+    "    params={\"project_id\": project_id, \"limit\": 100},\n",
+    "    timeout=60.0,\n",
+    ")\n",
+    "\n",
+    "if response.status_code == 200:\n",
+    "    items_response = response.json()\n",
+    "\n",
+    "    print(f\"\\n📋 Batch Job Items ({items_response['total_size']} total)\")\n",
+    "    print(f\"{'='*80}\\n\")\n",
+    "\n",
+    "    for item in items_response[\"items\"]:\n",
+    "        status_emoji = (\n",
+    "            \"✅\"\n",
+    "            if item[\"status\"] == \"completed\"\n",
+    "            else \"❌\"\n",
+    "            if item[\"status\"] == \"failed\"\n",
+    "            else \"⏳\"\n",
+    "        )\n",
+    "        print(f\"{status_emoji} {item['item_name']}\")\n",
+    "        print(f\"   Status: {item['status']}\")\n",
+    "        print(f\"   Item ID: {item['item_id']}\")\n",
+    "\n",
+    "        if item.get(\"error_message\"):\n",
+    "            print(f\"   Error: {item['error_message']}\")\n",
+    "\n",
+    "        print()\n",
+    "else:\n",
+    "    print(f\"Error listing items: {response.status_code} - {response.text}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-20",
+   "metadata": {},
+   "source": [
+    "## Step 6: Retrieve Processing Results\n",
+    "\n",
+    "For each completed file, we can retrieve the processing results to see where the parsed output is stored."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get processing results for a specific item\n",
+    "if items_response[\"items\"]:\n",
+    "    first_item = items_response[\"items\"][0]\n",
+    "\n",
+    "    print(f\"\\n🔍 Processing results for: {first_item['item_name']}\")\n",
+    "    print(f\"{'='*80}\\n\")\n",
+    "\n",
+    "    response = httpx.get(\n",
+    "        f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing/items/{first_item['item_id']}/processing-results\",\n",
+    "        headers=headers,\n",
+    "        params={\"project_id\": project_id},\n",
+    "        timeout=60.0,\n",
+    "    )\n",
+    "\n",
+    "    if response.status_code == 200:\n",
+    "        results = response.json()\n",
+    "\n",
+    "        print(f\"Item: {results['item_name']}\")\n",
+    "        print(f\"Total processing runs: {len(results['processing_results'])}\\n\")\n",
+    "\n",
+    "        for i, result in enumerate(results[\"processing_results\"], 1):\n",
+    "            print(f\"Run {i}:\")\n",
+    "            print(f\"  Job Type: {result['job_type']}\")\n",
+    "            print(f\"  Processed At: {result['processed_at']}\")\n",
+    "            print(f\"  Parameters Hash: {result['parameters_hash']}\")\n",
+    "\n",
+    "            if result.get(\"output_s3_path\"):\n",
+    "                print(f\"  Output S3 Path: {result['output_s3_path']}\")\n",
+    "\n",
+    "            if result.get(\"output_metadata\"):\n",
+    "                print(f\"  Output Metadata: {result['output_metadata']}\")\n",
+    "\n",
+    "            print()\n",
+    "    else:\n",
+    "        print(f\"Error getting results: {response.status_code} - {response.text}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-22",
+   "metadata": {},
+   "source": [
+    "## Optional: List All Batch Jobs\n",
+    "\n",
+    "You can also list all batch jobs in your project to see the history of batch processing operations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# List all parse jobs in the project\n",
+    "response = httpx.get(\n",
+    "    f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing\",\n",
+    "    headers=headers,\n",
+    "    params={\"project_id\": project_id, \"job_type\": \"parse\", \"limit\": 10},\n",
+    "    timeout=60.0,\n",
+    ")\n",
+    "\n",
+    "if response.status_code == 200:\n",
+    "    jobs_response = response.json()\n",
+    "\n",
+    "    print(f\"\\n📊 Recent Batch Parse Jobs ({jobs_response['total_size']} total)\")\n",
+    "    print(f\"{'='*80}\\n\")\n",
+    "\n",
+    "    for job in jobs_response[\"items\"]:\n",
+    "        status_emoji = (\n",
+    "            \"✅\"\n",
+    "            if job[\"status\"] == \"completed\"\n",
+    "            else \"❌\"\n",
+    "            if job[\"status\"] == \"failed\"\n",
+    "            else \"⏳\"\n",
+    "        )\n",
+    "        print(f\"{status_emoji} Job ID: {job['id']}\")\n",
+    "        print(f\"   Status: {job['status']}\")\n",
+    "        print(f\"   Directory: {job['directory_id']}\")\n",
+    "        print(f\"   Total Items: {job['total_items']}\")\n",
+    "        print(f\"   Completed: {job['processed_items']}\")\n",
+    "        print(f\"   Created: {job['created_at']}\")\n",
+    "        print()\n",
+    "else:\n",
+    "    print(f\"Error listing jobs: {response.status_code} - {response.text}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "uug7591rkq",
+   "metadata": {},
+   "source": [
+    "## Step 7: Retrieve Parsed Text Results\n",
+    "\n",
+    "Once the batch job is complete, each BatchJobItem will have a `job_id` field that maps to a parse job ID. We can use this ID with the standard parse client methods to fetch the actual parsed text results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "vpp0vxtc0y",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get all completed items and their job IDs\n",
+    "completed_items = [\n",
+    "    item for item in items_response[\"items\"] if item[\"status\"] == \"completed\"\n",
+    "]\n",
+    "\n",
+    "print(f\"📄 Found {len(completed_items)} completed items\\n\")\n",
+    "print(f\"{'='*80}\\n\")\n",
+    "\n",
+    "# Display the job_id for each completed item\n",
+    "for item in completed_items:\n",
+    "    print(f\"📝 {item['item_name']}\")\n",
+    "    print(f\"   Item ID: {item['item_id']}\")\n",
+    "    print(f\"   Parse Job ID: {item['job_id']}\")\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4gck6hwpnl6",
+   "metadata": {},
+   "source": [
+    "### Fetch Parsed Text for a Specific Document\n",
+    "\n",
+    "Now let's use the `job_id` to retrieve the actual parsed text content using the parse client methods."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "g191kvgxxvk",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the parsed text for the first completed item\n",
+    "if completed_items:\n",
+    "    first_completed = completed_items[0]\n",
+    "\n",
+    "    print(f\"📖 Retrieving parsed text for: {first_completed['item_name']}\")\n",
+    "    print(f\"   Using Parse Job ID: {first_completed['job_id']}\\n\")\n",
+    "    print(f\"{'='*80}\\n\")\n",
+    "\n",
+    "    # Use the job_id to fetch the parse result\n",
+    "    response = httpx.get(\n",
+    "        f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{first_completed['job_id']}/result/text\",\n",
+    "        headers=headers,\n",
+    "        params={\"project_id\": project_id},\n",
+    "        timeout=60.0,\n",
+    "    )\n",
+    "\n",
+    "    if response.status_code == 200:\n",
+    "        parse_result = response.text\n",
+    "\n",
+    "        print(f\"✅ Retrieved parsed text ({len(parse_result)} characters)\\n\")\n",
+    "\n",
+    "        # Display first 1000 characters as a preview\n",
+    "        print(\"Preview (first 1000 characters):\")\n",
+    "        print(\"-\" * 80)\n",
+    "        print(parse_result[:1000])\n",
+    "        print(\"-\" * 80)\n",
+    "\n",
+    "        if len(parse_result) > 1000:\n",
+    "            print(f\"\\n... and {len(parse_result) - 1000} more characters\")\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"Error retrieving parse result: {response.status_code} - {response.text}\"\n",
+    "        )\n",
+    "else:\n",
+    "    print(\"⚠️  No completed items found to retrieve results from\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2olccb4l8fj",
+   "metadata": {},
+   "source": [
+    "### Retrieve Parsed Results in Other Formats\n",
+    "\n",
+    "You can also retrieve the parsed results in JSON or Markdown format using different client methods."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "lcqsfxiw0sr",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if completed_items:\n",
+    "    first_completed = completed_items[0]\n",
+    "\n",
+    "    print(\n",
+    "        f\"📋 Retrieving parse results in different formats for: {first_completed['item_name']}\\n\"\n",
+    "    )\n",
+    "\n",
+    "    # Get as JSON (includes structured data with pages, images, etc.)\n",
+    "    print(\"1️⃣ Retrieving as JSON...\")\n",
+    "    response = httpx.get(\n",
+    "        f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{first_completed['job_id']}/result/json\",\n",
+    "        headers=headers,\n",
+    "        params={\"project_id\": project_id},\n",
+    "        timeout=60.0,\n",
+    "    )\n",
+    "\n",
+    "    if response.status_code == 200:\n",
+    "        json_result = response.json()\n",
+    "        print(f\"   ✅ JSON result with {len(json_result['pages'])} pages\")\n",
+    "        print(f\"      Keys: {list(json_result.keys())}\\n\")\n",
+    "    else:\n",
+    "        print(f\"   Error: {response.status_code}\\n\")\n",
+    "\n",
+    "    # Get as Markdown\n",
+    "    print(\"2️⃣ Retrieving as Markdown...\")\n",
+    "    response = httpx.get(\n",
+    "        f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{first_completed['job_id']}/result/markdown\",\n",
+    "        headers=headers,\n",
+    "        params={\"project_id\": project_id},\n",
+    "        timeout=60.0,\n",
+    "    )\n",
+    "\n",
+    "    if response.status_code == 200:\n",
+    "        markdown_result = response.text\n",
+    "        print(f\"   ✅ Markdown result ({len(markdown_result)} characters)\\n\")\n",
+    "\n",
+    "        # Display markdown preview\n",
+    "        print(\"Markdown Preview (first 500 characters):\")\n",
+    "        print(\"-\" * 80)\n",
+    "        print(markdown_result[:500])\n",
+    "        print(\"-\" * 80)\n",
+    "\n",
+    "        if len(markdown_result) > 500:\n",
+    "            print(f\"\\n... and {len(markdown_result) - 500} more characters\")\n",
+    "    else:\n",
+    "        print(f\"   Error: {response.status_code}\")\n",
+    "else:\n",
+    "    print(\"⚠️  No completed items found to retrieve results from\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "lr61wqkfq3",
+   "metadata": {},
+   "source": [
+    "### Batch Process All Parsed Results\n",
+    "\n",
+    "You can also loop through all completed items to retrieve and process all the parsed results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "kltydf9xzkl",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Process all completed items\n",
+    "print(f\"🔄 Processing all {len(completed_items)} completed items...\\n\")\n",
+    "print(f\"{'='*80}\\n\")\n",
+    "\n",
+    "all_results = {}\n",
+    "\n",
+    "for item in completed_items:\n",
+    "    print(f\"📄 Processing: {item['item_name']}\")\n",
+    "    print(f\"   Parse Job ID: {item['job_id']}\")\n",
+    "\n",
+    "    try:\n",
+    "        # Retrieve the parsed text for this item\n",
+    "        response = httpx.get(\n",
+    "            f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{item['job_id']}/result/text\",\n",
+    "            headers=headers,\n",
+    "            params={\"project_id\": project_id},\n",
+    "            timeout=60.0,\n",
+    "        )\n",
+    "\n",
+    "        if response.status_code == 200:\n",
+    "            parsed_text = response.text\n",
+    "\n",
+    "            all_results[item[\"item_name\"]] = {\n",
+    "                \"job_id\": item[\"job_id\"],\n",
+    "                \"text\": parsed_text,\n",
+    "                \"length\": len(parsed_text),\n",
+    "            }\n",
+    "\n",
+    "            print(f\"   ✅ Retrieved {len(parsed_text)} characters\")\n",
+    "        else:\n",
+    "            all_results[item[\"item_name\"]] = {\n",
+    "                \"job_id\": item[\"job_id\"],\n",
+    "                \"error\": f\"HTTP {response.status_code}\",\n",
+    "            }\n",
+    "            print(f\"   ❌ Error: HTTP {response.status_code}\")\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"   ❌ Error: {str(e)}\")\n",
+    "        all_results[item[\"item_name\"]] = {\"job_id\": item[\"job_id\"], \"error\": str(e)}\n",
+    "\n",
+    "    print()\n",
+    "\n",
+    "print(f\"{'='*80}\")\n",
+    "print(f\"\\n✅ Processed {len(all_results)} items\")\n",
+    "print(f\"\\nSummary:\")\n",
+    "for name, result in all_results.items():\n",
+    "    if \"error\" in result:\n",
+    "        print(f\"  ❌ {name}: Error - {result['error']}\")\n",
+    "    else:\n",
+    "        print(f\"  ✅ {name}: {result['length']:,} characters\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -16,6 +16,14 @@
    "![](asset_manager_fund_analysis.png)\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "cbafd7ee",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "cda2e5e9-fe9d-42d9-9387-f529d970ff7b",
@@ -20,6 +20,14 @@
    "This workflow is designed for equity research analysts and investment professionals."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "e7979faf",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -19,6 +19,13 @@
    "The example we go through below is also replicable within Llama Cloud as well, where you will also be able to pick between a number of pre-defined schemas, instead of building your own."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -15,6 +15,13 @@
    "Dow Jones Industrial Average (DJIA) is a stock market index that consists of 30 large companies listed on the New York Stock Exchange and the NASDAQ and is considered a good proxy for the overall US stock market. For this exercise, we will extract the insider transactions for all the companies in the DJIA. Let's first get the list of tickers in the Dow Jones Industrial Average using Wikipedia."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -16,6 +16,14 @@
    "This approach reduces manual data entry, improves extraction accuracy and standardization, and provides traceability for each technical detail."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "8d1efe6e",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "a3b8c8d5-ff3e-48ce-b0b8-29b6b1f517f8",
@@ -11,6 +11,13 @@
    "Take a look at one of the resumes in the `data/resumes` directory. "
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -20,6 +20,14 @@
    "> **Note:** This principle of what fields generalize across your target documents and what might be optional is an important one to keep in mind when designing your schema. \n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "355adfd4",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -21,6 +21,14 @@
    "The following notebook uses the event‑driven syntax (with custom events, steps, and a workflow class) adapted from the technical datasheet and contract review examples."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "ab7be988",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "36d8e34e-ed98-46ac-b744-1642f6e253d5",
@@ -35,6 +35,14 @@
    "📖 For more details, see the [Extraction Target documentation](https://developers.llamaindex.ai/python/cloud/llamaextract/features/concepts/#extraction-target)."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "cb760594",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -31,6 +31,13 @@
    "| Sep-02-2025  | 0.6.62  | Active     |\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -19,6 +19,13 @@
    "The workflow is implemented as a proper LlamaIndex Workflow with separate steps for parsing, classification, and extraction, connected by typed events. This provides modularity, observability, and type safety."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -27,6 +27,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "e2b422f5",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "2e4f707a-c7b5-473f-b4a6-881e2245e82d",
@@ -14,6 +14,13 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -0,0 +1,188 @@
+"""
+⚠️ DEPRECATION NOTICE:
+This example uses the deprecated llama-cloud-services package, which will be maintained until May 1, 2026.
+Please migrate to: pip install llama-cloud>=1.0 (https://github.com/run-llama/llama-cloud-py)
+"""
+"""
+Example: Batch Processing a Folder of PDFs with LlamaParse
+
+This script demonstrates how to process multiple PDFs from a folder
+using LlamaParse with controlled concurrency using asyncio and semaphores.
+
+Usage:
+    python batch_parse_folder.py --input-dir ./pdfs --max-concurrent 5
+"""
+
+import asyncio
+import argparse
+from pathlib import Path
+from typing import List, Dict, Any
+from datetime import datetime
+from dotenv import load_dotenv
+import os
+
+from llama_cloud_services import LlamaParse
+
+# Load environment variables from .env file
+load_dotenv()
+
+
+async def parse_single_file(
+    parser: LlamaParse,
+    file_path: Path,
+    semaphore: asyncio.Semaphore,
+) -> Dict[str, Any]:
+    """
+    Parse a single PDF file with concurrency control.
+
+    Args:
+        parser: LlamaParse instance
+        file_path: Path to the PDF file
+        semaphore: Semaphore to control concurrent requests
+
+    Returns:
+        Dictionary with file info and parse result
+    """
+    async with semaphore:
+        try:
+            print(f"Starting parse: {file_path.name}")
+
+            result = await parser.aparse(str(file_path))
+
+            print(f"✓ Completed: {file_path.name} ({len(result.pages)} pages)")
+
+            return {
+                "file": file_path.name,
+                "status": "success",
+                "result": result,
+                "pages": len(result.pages) if result.pages else 0,
+            }
+        except Exception as e:
+            print(f"✗ Error parsing {file_path.name}: {str(e)}")
+            return {
+                "file": file_path.name,
+                "status": "error",
+                "error": str(e),
+            }
+
+
+async def parse_folder(
+    input_dir: Path,
+    max_concurrent: int = 5,
+    api_key: str = None,
+) -> List[Dict[str, any]]:
+    """
+    Parse all PDFs in a folder with controlled concurrency.
+
+    Args:
+        input_dir: Directory containing PDF files
+        max_concurrent: Maximum number of concurrent parse operations
+        api_key: LlamaCloud API key (loaded from .env file)
+
+    Returns:
+        List of parse results for each file
+    """
+    # Find all PDF files
+    pdf_files = list(input_dir.glob("*.pdf"))
+
+    if not pdf_files:
+        print(f"No PDF files found in {input_dir}")
+        return []
+
+    print(f"Found {len(pdf_files)} PDF files to parse")
+
+    # Initialize parser
+    parser = LlamaParse(
+        api_key=api_key,
+        num_workers=1,  # We control concurrency with semaphore
+        show_progress=False,  # We'll show our own progress
+    )
+
+    # Create semaphore to limit concurrent requests
+    semaphore = asyncio.Semaphore(max_concurrent)
+
+    # Create tasks for all files
+    tasks = [parse_single_file(parser, pdf_file, semaphore) for pdf_file in pdf_files]
+
+    # Run all tasks concurrently (but limited by semaphore)
+    print(
+        f"Processing {len(tasks)} files with max {max_concurrent} concurrent operations..."
+    )
+    start_time = datetime.now()
+
+    results = await asyncio.gather(*tasks)
+
+    end_time = datetime.now()
+    duration = (end_time - start_time).total_seconds()
+
+    # Process results
+    successful = [
+        r for r in results if isinstance(r, dict) and r.get("status") == "success"
+    ]
+    failed = [r for r in results if isinstance(r, dict) and r.get("status") == "error"]
+
+    # Print summary
+    print("PARSE SUMMARY \n")
+    print(f"Total files: {len(pdf_files)}")
+    print(f"Successful: {len(successful)}")
+    print(f"Failed: {len(failed)}")
+    print(f"Total time: {duration:.2f} seconds")
+    print(f"Average time per file: {duration / len(pdf_files):.2f} seconds")
+
+    if failed:
+        print("\nFailed files:")
+        for result in failed:
+            print(f"  - {result['file']}: {result.get('error', 'Unknown error')}")
+
+    return results
+
+
+def main():
+    """Main entry point for the script."""
+    parser = argparse.ArgumentParser(
+        description="Batch process PDFs in a folder with LlamaParse"
+    )
+    parser.add_argument(
+        "--input-dir",
+        type=str,
+        required=True,
+        help="Directory containing PDF files to parse",
+    )
+    parser.add_argument(
+        "--max-concurrent",
+        type=int,
+        default=5,
+        help="Maximum number of concurrent parse operations (default: 5)",
+    )
+
+    args = parser.parse_args()
+
+    input_dir = Path(args.input_dir)
+
+    # Validate input directory
+    if not input_dir.exists():
+        print(f"Error: Input directory does not exist: {input_dir}")
+        return
+
+    if not input_dir.is_dir():
+        print(f"Error: Input path is not a directory: {input_dir}")
+        return
+
+    # Get API key from environment (loaded from .env file)
+    api_key = os.getenv("LLAMA_CLOUD_API_KEY")
+    if not api_key:
+        print("Error: LLAMA_CLOUD_API_KEY not found. Please set it in your .env file")
+        return
+
+    # Run async function
+    asyncio.run(
+        parse_folder(
+            input_dir=input_dir,
+            max_concurrent=args.max_concurrent,
+            api_key=api_key,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
@@ -17,6 +17,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "0cb82ca8",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "ef115dbe-b834-4639-828e-e2c11aef710b",
@@ -18,6 +18,13 @@
    "| Aug-18-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -14,6 +14,13 @@
    "| Aug-18-2025   | N/A     | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -14,6 +14,13 @@
    "| Aug-18-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -18,6 +18,13 @@
    "| Aug-18-2025   | 0.6.61  | Maintained |\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -19,6 +19,14 @@
    "| Aug-18-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "bb595498",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "a004db48-8d3f-421c-915a-477692f71b90",
@@ -16,6 +16,13 @@
    "| Aug-19-2025   | 0.6.61  | Deprecated |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -19,6 +19,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "8b937443",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "a004db48-8d3f-421c-915a-477692f71b90",
@@ -19,6 +19,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "037cc6d9",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "a004db48-8d3f-421c-915a-477692f71b90",
@@ -19,6 +19,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "7aa3be47",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -21,6 +21,13 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -8,6 +8,14 @@
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/demo_starter_multimodal.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "da52cfa3",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "4e081457",
@@ -7,6 +7,13 @@
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/demo_starter_parse_selected_pages.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -14,6 +14,13 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -17,6 +17,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "a3636937",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "5f7d99ad-6ebd-47d0-92a7-566630b0c22a",
@@ -7,6 +7,13 @@
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/excel/o1_excel_rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -17,6 +17,14 @@
    "| Before Feb 2025   | N/A  | Deprecated |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "0facb0b9",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "e8db8ac2-5221-44de-a53e-cb5ab37ac8f5",
@@ -19,6 +19,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "bb943339",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -19,6 +19,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "17e62444",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -19,6 +19,14 @@
    "| Aug-19-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "fe7e837a",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "15e60ecf-519c-41fc-911b-765adaf8bad4",
@@ -9,6 +9,13 @@
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/multimodal/insurance_rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -23,6 +23,13 @@
    "- [US Immigration Case](https://github.com/user-attachments/files/16536446/us_immigration_case.pdf)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -27,6 +27,14 @@
    "![mm_rag_diagram](./multimodal_contextual_retrieval_rag_img.png)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "93d4f9ab",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "54e8d9a7-5036-4d32-818f-00b2e888521f",
@@ -27,6 +27,14 @@
    "![mm_rag_diagram](./multimodal_rag_slide_deck_img.png)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "fc1b5803",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "54e8d9a7-5036-4d32-818f-00b2e888521f",
@@ -19,6 +19,14 @@
    "| Aug-20-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "7dafd458",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "54e8d9a7-5036-4d32-818f-00b2e888521f",
@@ -21,6 +21,14 @@
    "We use our workflow abstraction to define an agentic system that contains two main phases: a research phase that pulls in relevant files through chunk-level or file-level retrieval, and then a blog generation phase that synthesizes the final report."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "8c881021",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "54e8d9a7-5036-4d32-818f-00b2e888521f",
@@ -9,6 +9,13 @@
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/multimodal/product_manual_rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -19,6 +19,14 @@
    "| Prior to Feb-2025   | N/A  | Deprecated |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "b27f0e78",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -14,6 +14,13 @@
    "| Prior to Feb-2025   | N/A  | Deprecated |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -29,6 +29,13 @@
    "In this demonstration, we showcase how parsing instructions can be used to extract specific information from unstructured documents. Using a McDonald's Receipt, we show how to ignore parts of the document and only parse the price of each order and the final amount to be paid."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -18,6 +18,13 @@
    "Many documents can have varying complexity across pages - some pages have text, and other pages have images. The text-only pages only require cheap parsing modes, whereas the image-based pages require more advanced modes. In this notebook we show you how to take advantage of \"auto mode\" in LlamaParse which adaptively parses different pages according to different modes, which lets you get optimal performance at the cheapest cost.\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -37,6 +37,13 @@
    "With visual references, you can build applications that preserve document structure and provide users with trustworthy, traceable visual citations. We will now leverage this feature to build our query engine."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -24,6 +24,13 @@
    "| Aug-18-2025   | 0.6.61  | Maintained |"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -26,6 +26,14 @@
    "We use LlamaParse to parse the context documents as well as the RFP document itself."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "ad140aef",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -22,6 +22,14 @@
    "**NOTE**: The pricing for LlamaParse + gpt4o is an order more expensive than using LlamaParse by default. Currently, every page parsed with gpt4o counts for 10 pages in the LlamaParse usage tracker.\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "211c52fe",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -1,4 +1,9 @@
 """
+⚠️ DEPRECATION NOTICE:
+This example uses the deprecated llama-cloud-services package, which will be maintained until May 1, 2026.
+Please migrate to: pip install llama-cloud>=1.0 (https://github.com/run-llama/llama-cloud-py)
+"""
+"""
 Generate sample spreadsheets for LlamaSheets + Claude workflows.

 This script creates example Excel files that demonstrate different use cases:
@@ -1,3 +1,8 @@
+"""
+⚠️ DEPRECATION NOTICE:
+This example uses the deprecated llama-cloud-services package, which will be maintained until May 1, 2026.
+Please migrate to: pip install llama-cloud>=1.0 (https://github.com/run-llama/llama-cloud-py)
+"""
 """Helper script to extract spreadsheets using LlamaSheets."""

 import asyncio
@@ -1,4 +1,9 @@
 """
+⚠️ DEPRECATION NOTICE:
+This example uses the deprecated llama-cloud-services package, which will be maintained until May 1, 2026.
+Please migrate to: pip install llama-cloud>=1.0 (https://github.com/run-llama/llama-cloud-py)
+"""
+"""
 Generate sample spreadsheets for LlamaSheets + LlamaIndex Agent workflows.

 This script creates example Excel files that demonstrate different use cases:
@@ -1,4 +1,9 @@
 """
+⚠️ DEPRECATION NOTICE:
+This example uses the deprecated llama-cloud-services package, which will be maintained until May 1, 2026.
+Please migrate to: pip install llama-cloud>=1.0 (https://github.com/run-llama/llama-cloud-py)
+"""
+"""
 LlamaSheets Agent with LlamaIndex

 This example shows how to build an agent that can work with spreadsheet data
@@ -1,3 +1,8 @@
+"""
+⚠️ DEPRECATION NOTICE:
+This example uses the deprecated llama-cloud-services package, which will be maintained until May 1, 2026.
+Please migrate to: pip install llama-cloud>=1.0 (https://github.com/run-llama/llama-cloud-py)
+"""
 """Helper script to extract spreadsheets using LlamaSheets."""

 import asyncio
@@ -0,0 +1,547 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Document Splitting with LlamaCloud\n",
+    "\n",
+    "This notebook demonstrates how to use the LlamaCloud **Split** API to automatically segment a concatenated PDF into logical document sections based on content categories.\n",
+    "\n",
+    "## Use Case\n",
+    "\n",
+    "When dealing with large PDFs that contain multiple distinct documents or sections (e.g., a bundle of research papers, a collection of reports), you often need to split them into individual segments. The Split API uses AI to:\n",
+    "\n",
+    "1. Analyze each page's content\n",
+    "2. Classify pages into user-defined categories\n",
+    "3. Group consecutive pages of the same category into segments\n",
+    "\n",
+    "## Example Document\n",
+    "\n",
+    "We'll use a PDF containing three concatenated documents:\n",
+    "- **Alan Turing's essay** \"Intelligent Machinery, A Heretical Theory\" (an essay)\n",
+    "- **ImageNet paper** (a research paper)\n",
+    "- **\"Attention is All You Need\"** paper (a research paper)\n",
+    "\n",
+    "We'll split this into segments categorized as either `essay` or `research_paper`.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: llama-cloud in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (0.1.44)\n",
+      "Requirement already satisfied: python-dotenv in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (1.2.1)\n",
+      "Requirement already satisfied: requests in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (2.32.5)\n",
+      "Requirement already satisfied: certifi>=2024.7.4 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from llama-cloud) (2025.11.12)\n",
+      "Requirement already satisfied: httpx>=0.20.0 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from llama-cloud) (0.28.1)\n",
+      "Requirement already satisfied: pydantic>=1.10 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from llama-cloud) (2.12.5)\n",
+      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from requests) (3.4.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from requests) (3.11)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from requests) (2.5.0)\n",
+      "Requirement already satisfied: anyio in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from httpx>=0.20.0->llama-cloud) (4.11.0)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from httpx>=0.20.0->llama-cloud) (1.0.9)\n",
+      "Requirement already satisfied: h11>=0.16 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.20.0->llama-cloud) (0.16.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from pydantic>=1.10->llama-cloud) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.41.5 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from pydantic>=1.10->llama-cloud) (2.41.5)\n",
+      "Requirement already satisfied: typing-extensions>=4.14.1 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from pydantic>=1.10->llama-cloud) (4.15.0)\n",
+      "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from pydantic>=1.10->llama-cloud) (0.4.2)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /Users/javier/llama_cloud_services/.venv/lib/python3.11/site-packages (from anyio->httpx>=0.20.0->llama-cloud) (1.3.1)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.3\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Install required packages\n",
+    "%pip install llama-cloud python-dotenv requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "✅ API configured with base URL: https://api.cloud.llamaindex.ai\n",
+      "✅ Project ID: using default project\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "import requests\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "# Load environment variables\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Configuration\n",
+    "LLAMA_CLOUD_API_KEY = os.environ.get(\"LLAMA_CLOUD_API_KEY\", \"llx-...\")\n",
+    "BASE_URL = os.environ.get(\"LLAMA_CLOUD_BASE_URL\", \"https://api.cloud.llamaindex.ai\")\n",
+    "PROJECT_ID = os.environ.get(\"LLAMA_CLOUD_PROJECT_ID\", None)\n",
+    "\n",
+    "# Headers for API requests\n",
+    "headers = {\n",
+    "    \"Authorization\": f\"Bearer {LLAMA_CLOUD_API_KEY}\",\n",
+    "    \"Content-Type\": \"application/json\",\n",
+    "}\n",
+    "\n",
+    "print(f\"✅ API configured with base URL: {BASE_URL}\")\n",
+    "print(f\"✅ Project ID: {PROJECT_ID or 'using default project'}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Upload the PDF File\n",
+    "\n",
+    "First, we'll upload our concatenated PDF to LlamaCloud using the Files API. This can be done using the `llama-cloud` SDK.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "📤 Uploading ./data/turing+imagenet+attention.pdf...\n",
+      "✅ File uploaded successfully!\n",
+      "   File name: turing+imagenet+attention.pdf\n"
+     ]
+    }
+   ],
+   "source": [
+    "from llama_cloud.client import LlamaCloud\n",
+    "\n",
+    "# Initialize the client\n",
+    "client = LlamaCloud(token=LLAMA_CLOUD_API_KEY, base_url=BASE_URL)\n",
+    "\n",
+    "# Path to the PDF file\n",
+    "pdf_path = \"./data/turing+imagenet+attention.pdf\"\n",
+    "\n",
+    "# Upload the file\n",
+    "print(f\"📤 Uploading {pdf_path}...\")\n",
+    "\n",
+    "with open(pdf_path, \"rb\") as f:\n",
+    "    uploaded_file = client.files.upload_file(upload_file=f, project_id=PROJECT_ID)\n",
+    "\n",
+    "file_id = uploaded_file.id\n",
+    "print(f\"✅ File uploaded successfully!\")\n",
+    "print(f\"   File name: {uploaded_file.name}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Create a Split Job\n",
+    "\n",
+    "Now we'll create a split job using the Split API. Since the Split API is in beta and not yet available in the SDK, we'll use raw HTTP requests.\n",
+    "\n",
+    "We define two categories:\n",
+    "- **essay**: For philosophical or reflective writing\n",
+    "- **research_paper**: For formal academic documents with methodology and citations\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🔄 Creating split job...\n",
+      "✅ Split job created!\n",
+      "   Job ID: spl-zsssb632a742aikliu96pqkb56t5\n",
+      "   Status: pending\n",
+      "   Categories: ['essay', 'research_paper']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define the split job request\n",
+    "split_request = {\n",
+    "    \"document_input\": {\n",
+    "        \"type\": \"file_id\",  # only file_id is supported for now\n",
+    "        \"value\": file_id,\n",
+    "    },\n",
+    "    \"categories\": [\n",
+    "        {\n",
+    "            \"name\": \"essay\",\n",
+    "            \"description\": \"A philosophical or reflective piece of writing that presents personal viewpoints, arguments, or thoughts on a topic without strict formal structure\",\n",
+    "        },\n",
+    "        {\n",
+    "            \"name\": \"research_paper\",\n",
+    "            \"description\": \"A formal academic document presenting original research, methodology, experiments, results, and conclusions with citations and references\",\n",
+    "        },\n",
+    "    ],\n",
+    "}\n",
+    "\n",
+    "# Create the split job\n",
+    "print(\"🔄 Creating split job...\")\n",
+    "response = requests.post(\n",
+    "    f\"{BASE_URL}/api/v1/beta/split/jobs\",\n",
+    "    params={\"project_id\": PROJECT_ID},\n",
+    "    headers=headers,\n",
+    "    json=split_request,\n",
+    ")\n",
+    "response.raise_for_status()\n",
+    "\n",
+    "split_job = response.json()\n",
+    "job_id = split_job[\"id\"]\n",
+    "\n",
+    "print(f\"✅ Split job created!\")\n",
+    "print(f\"   Job ID: {job_id}\")\n",
+    "print(f\"   Status: {split_job['status']}\")\n",
+    "print(f\"   Categories: {[c['name'] for c in split_job['categories']]}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Poll for Job Completion\n",
+    "\n",
+    "The split job runs asynchronously. We'll poll the job status until it completes.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "⏳ Waiting for split job to complete...\n",
+      "   Status: processing (elapsed: 0s)\n",
+      "   Status: processing (elapsed: 5s)\n",
+      "   Status: processing (elapsed: 11s)\n",
+      "   Status: completed (elapsed: 16s)\n",
+      "\n",
+      "✅ Split job completed successfully!\n"
+     ]
+    }
+   ],
+   "source": [
+    "def poll_split_job(job_id: str, max_wait_seconds: int = 180, poll_interval: int = 5):\n",
+    "    \"\"\"\n",
+    "    Poll a split job until it reaches a terminal state.\n",
+    "\n",
+    "    Args:\n",
+    "        job_id: The split job ID\n",
+    "        max_wait_seconds: Maximum time to wait for completion\n",
+    "        poll_interval: Seconds between poll attempts\n",
+    "\n",
+    "    Returns:\n",
+    "        The completed job response\n",
+    "    \"\"\"\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    while (time.time() - start_time) < max_wait_seconds:\n",
+    "        response = requests.get(\n",
+    "            f\"{BASE_URL}/api/v1/beta/split/jobs/{job_id}\",\n",
+    "            params={\"project_id\": PROJECT_ID},\n",
+    "            headers=headers,\n",
+    "        )\n",
+    "        response.raise_for_status()\n",
+    "        job = response.json()\n",
+    "\n",
+    "        status = job[\"status\"]\n",
+    "        elapsed = int(time.time() - start_time)\n",
+    "        print(f\"   Status: {status} (elapsed: {elapsed}s)\")\n",
+    "\n",
+    "        if status in [\"completed\", \"failed\"]:\n",
+    "            return job\n",
+    "\n",
+    "        time.sleep(poll_interval)\n",
+    "\n",
+    "    raise TimeoutError(f\"Job did not complete within {max_wait_seconds} seconds\")\n",
+    "\n",
+    "\n",
+    "print(\"⏳ Waiting for split job to complete...\")\n",
+    "completed_job = poll_split_job(job_id)\n",
+    "\n",
+    "if completed_job[\"status\"] == \"completed\":\n",
+    "    print(\"\\n✅ Split job completed successfully!\")\n",
+    "else:\n",
+    "    print(\n",
+    "        f\"\\n❌ Split job failed: {completed_job.get('error_message', 'Unknown error')}\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4: Analyze the Results\n",
+    "\n",
+    "Let's examine the split results to see how the document was segmented.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "📊 Split Results Summary\n",
+      "==================================================\n",
+      "Total segments found: 3\n",
+      "\n",
+      "Segments by category:\n",
+      "   • essay: 1 segment(s)\n",
+      "   • research_paper: 2 segment(s)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get the segments from the result\n",
+    "segments = completed_job.get(\"result\", {}).get(\"segments\", [])\n",
+    "\n",
+    "print(f\"📊 Split Results Summary\")\n",
+    "print(f\"=\" * 50)\n",
+    "print(f\"Total segments found: {len(segments)}\")\n",
+    "print()\n",
+    "\n",
+    "# Count by category\n",
+    "category_counts = {}\n",
+    "for segment in segments:\n",
+    "    cat = segment[\"category\"]\n",
+    "    category_counts[cat] = category_counts.get(cat, 0) + 1\n",
+    "\n",
+    "print(\"Segments by category:\")\n",
+    "for cat, count in category_counts.items():\n",
+    "    print(f\"   • {cat}: {count} segment(s)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "📄 Segment Details\n",
+      "==================================================\n",
+      "\n",
+      "Segment 1:\n",
+      "   Category: essay\n",
+      "   Pages 1-4 (4 pages)\n",
+      "   Confidence: high\n",
+      "\n",
+      "Segment 2:\n",
+      "   Category: research_paper\n",
+      "   Pages 5-13 (9 pages)\n",
+      "   Confidence: high\n",
+      "\n",
+      "Segment 3:\n",
+      "   Category: research_paper\n",
+      "   Pages 14-24 (11 pages)\n",
+      "   Confidence: high\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Display detailed segment information\n",
+    "print(f\"\\n📄 Segment Details\")\n",
+    "print(f\"=\" * 50)\n",
+    "\n",
+    "for i, segment in enumerate(segments, 1):\n",
+    "    category = segment[\"category\"]\n",
+    "    pages = segment[\"pages\"]\n",
+    "    confidence = segment[\"confidence_category\"]\n",
+    "\n",
+    "    # Format page range\n",
+    "    if len(pages) == 1:\n",
+    "        page_range = f\"Page {pages[0]}\"\n",
+    "    else:\n",
+    "        page_range = f\"Pages {min(pages)}-{max(pages)}\"\n",
+    "\n",
+    "    print(f\"\\nSegment {i}:\")\n",
+    "    print(f\"   Category: {category}\")\n",
+    "    print(f\"   {page_range} ({len(pages)} page{'s' if len(pages) > 1 else ''})\")\n",
+    "    print(f\"   Confidence: {confidence}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Expected Results\n",
+    "\n",
+    "Based on our test document, we expect:\n",
+    "- **1 essay segment**: Alan Turing's \"Intelligent Machinery, A Heretical Theory\"\n",
+    "- **2 research paper segments**: ImageNet paper and \"Attention is All You Need\" paper\n",
+    "\n",
+    "The pages should be grouped consecutively, with no overlap between segments.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "✅ Validation\n",
+      "==================================================\n",
+      "Total pages assigned: 24\n",
+      "Unique pages: 24\n",
+      "✅ No page overlap detected - each page belongs to exactly one segment\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Verify no page overlap\n",
+    "all_pages = []\n",
+    "for segment in segments:\n",
+    "    all_pages.extend(segment[\"pages\"])\n",
+    "\n",
+    "unique_pages = set(all_pages)\n",
+    "\n",
+    "print(f\"\\n✅ Validation\")\n",
+    "print(f\"=\" * 50)\n",
+    "print(f\"Total pages assigned: {len(all_pages)}\")\n",
+    "print(f\"Unique pages: {len(unique_pages)}\")\n",
+    "\n",
+    "if len(all_pages) == len(unique_pages):\n",
+    "    print(f\"✅ No page overlap detected - each page belongs to exactly one segment\")\n",
+    "else:\n",
+    "    print(\n",
+    "        f\"⚠️  Page overlap detected - {len(all_pages) - len(unique_pages)} duplicate assignments\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using `allow_uncategorized` Strategy\n",
+    "\n",
+    "You can also use the `allow_uncategorized` splitting strategy. This is useful when you want to capture pages that don't match any defined category.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "📝 With allow_uncategorized=True and only 'essay' category defined,\n",
+      "   pages that don't match 'essay' will be grouped as 'uncategorized'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example with allow_uncategorized strategy\n",
+    "split_request_uncategorized = {\n",
+    "    \"document_input\": {\"type\": \"file_id\", \"value\": file_id},\n",
+    "    \"categories\": [\n",
+    "        {\n",
+    "            \"name\": \"essay\",\n",
+    "            \"description\": \"A philosophical or reflective piece of writing that presents personal viewpoints, arguments, or thoughts on a topic\",\n",
+    "        }\n",
+    "        # Note: We only define 'essay' category\n",
+    "        # Research papers will be classified as 'uncategorized'\n",
+    "    ],\n",
+    "    \"splitting_strategy\": {\"allow_uncategorized\": True},\n",
+    "}\n",
+    "\n",
+    "print(\"📝 With allow_uncategorized=True and only 'essay' category defined,\")\n",
+    "print(\"   pages that don't match 'essay' will be grouped as 'uncategorized'.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "The LlamaCloud Split API provides a powerful way to automatically segment concatenated documents based on content categories. This is useful for:\n",
+    "\n",
+    "- **Document processing pipelines**: Automatically separate bundled documents before further processing\n",
+    "- **Content organization**: Categorize and organize mixed document collections\n",
+    "- **Information extraction**: Identify different document types within a single file\n",
+    "\n",
+    "### Key Features\n",
+    "\n",
+    "- **AI-powered classification**: Uses LLMs to understand page content and assign categories\n",
+    "- **Flexible categories**: Define any categories relevant to your use case\n",
+    "- **Confidence scoring**: Each segment includes a confidence level\n",
+    "- **Page-level granularity**: Results include exact page numbers for each segment\n",
+    "\n",
+    "### API Reference\n",
+    "\n",
+    "- **Create Split Job**: `POST /api/v1/beta/split/jobs`\n",
+    "- **Get Split Job**: `GET /api/v1/beta/split/jobs/{job_id}`\n",
+    "- **List Split Jobs**: `GET /api/v1/beta/split/jobs`\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -1,403 +0,0 @@
-# LlamaExtract
-
-LlamaExtract provides a simple API for extracting structured data from unstructured documents like PDFs, text files and images.
-
-## Table of Contents
-
- [Quick Start](#quick-start)
-  - [Supported File Types](#supported-file-types)
-  - [Different Input Types](#different-input-types)
-  - [Async Extraction](#async-extraction)
- [Core Concepts](#core-concepts)
- [Defining Schemas](#defining-schemas)
-  - [Using Pydantic (Recommended)](#using-pydantic-recommended)
-  - [Using JSON Schema](#using-json-schema)
-  - [Important restrictions on JSON/Pydantic Schema](#important-restrictions-on-jsonpydantic-schema)
- [Extraction Configuration](#extraction-configuration)
-  - [Configuration Options](#configuration-options)
- [Extraction Agents (Advanced)](#extraction-agents-advanced)
-  - [Creating Agents](#creating-agents)
-  - [Agent Batch Processing](#agent-batch-processing)
-  - [Updating Agent Schemas](#updating-agent-schemas)
-  - [Managing Agents](#managing-agents)
-  - [When to Use Agents vs Direct Extraction](#when-to-use-agents-vs-direct-extraction)
- [Installation](#installation)
- [Tips & Best Practices](#tips--best-practices)
- [Additional Resources](#additional-resources)
-
-## Quick Start
-
-The simplest way to get started is to use the stateless API with the extraction configuration and the file/text to extract from:
-
-```python
-from llama_cloud_services import LlamaExtract
-from llama_cloud import ExtractConfig, ExtractMode
-from pydantic import BaseModel, Field
-
-# Initialize client
-extractor = LlamaExtract(api_key="YOUR_API_KEY")
-
-
-# Define schema using Pydantic
-class Resume(BaseModel):
-    name: str = Field(description="Full name of candidate")
-    email: str = Field(description="Email address")
-    skills: list[str] = Field(description="Technical skills and technologies")
-
-
-# Configure extraction settings
-config = ExtractConfig(extraction_mode=ExtractMode.FAST)
-
-# Extract data directly from document - no agent needed!
-result = extractor.extract(Resume, config, "resume.pdf")
-print(result.data)
-```
-
-### Supported File Types
-
-LlamaExtract supports the following file formats:
-
- **Documents**: PDF (.pdf), Word (.docx)
- **Text files**: Plain text (.txt), CSV (.csv), JSON (.json), HTML (.html, .htm), Markdown (.md)
- **Images**: PNG (.png), JPEG (.jpg, .jpeg)
-
-### Different Input Types
-
-```python
-# From file path (string or Path)
-result = extractor.extract(Resume, config, "resume.pdf")
-
-# From file handle
-with open("resume.pdf", "rb") as f:
-    result = extractor.extract(Resume, config, f)
-
-# From bytes with filename
-with open("resume.pdf", "rb") as f:
-    file_bytes = f.read()
-from llama_cloud_services.extract import SourceText
-
-result = extractor.extract(
-    Resume, config, SourceText(file=file_bytes, filename="resume.pdf")
-)
-
-# From text content
-text = "Name: John Doe\nEmail: john@example.com\nSkills: Python, AI"
-result = extractor.extract(Resume, config, SourceText(text_content=text))
-```
-
-### Async Extraction
-
-For better performance with multiple files or when integrating with async applications.
-Here `queue_extraction` will enqueue the extraction jobs and exit. Alternatively, you
-can use `aextract` to poll for the job and return the extraction results.
-
-```python
-import asyncio
-
-
-async def extract_resumes():
-    # Async extraction
-    result = await extractor.aextract(Resume, config, "resume.pdf")
-    print(result.data)
-
-    # Queue extraction jobs (returns immediately)
-    jobs = await extractor.queue_extraction(
-        Resume, config, ["resume1.pdf", "resume2.pdf"]
-    )
-    print(f"Queued {len(jobs)} extraction jobs")
-    return jobs
-
-
-# Run async function
-jobs = asyncio.run(extract_resumes())
-# Check job status
-for job in jobs:
-    status = agent.get_extraction_job(job.id).status
-    print(f"Job {job.id}: {status}")
-
-# Get results when complete
-results = [agent.get_extraction_run_for_job(job.id) for job in jobs]
-```
-
-## Core Concepts
-
- **Data Schema**: Structure definition for the data you want to extract in the form of a JSON schema or a Pydantic model.
- **Extraction Config**: Settings that control how extraction is performed (e.g., speed vs accuracy trade-offs).
- **Extraction Jobs**: Asynchronous extraction tasks that can be monitored.
- **Extraction Agents** (Advanced): Reusable extractors configured with a specific schema and extraction settings.
-
-## Defining Schemas
-
-Schemas define the structure of data you want to extract. You can use either Pydantic models or JSON Schema:
-
-### Using Pydantic (Recommended)
-
-```python
-from pydantic import BaseModel, Field
-from typing import List, Optional
-from llama_cloud import ExtractConfig, ExtractMode
-
-
-class Experience(BaseModel):
-    company: str = Field(description="Company name")
-    title: str = Field(description="Job title")
-    start_date: Optional[str] = Field(description="Start date of employment")
-    end_date: Optional[str] = Field(description="End date of employment")
-
-
-class Resume(BaseModel):
-    name: str = Field(description="Candidate name")
-    experience: List[Experience] = Field(description="Work history")
-
-
-# Use the schema for extraction
-config = ExtractConfig(extraction_mode=ExtractMode.FAST)
-result = extractor.extract(Resume, config, "resume.pdf")
-```
-
-### Using JSON Schema
-
-```python
-schema = {
-    "type": "object",
-    "properties": {
-        "name": {"type": "string", "description": "Candidate name"},
-        "experience": {
-            "type": "array",
-            "description": "Work history",
-            "items": {
-                "type": "object",
-                "properties": {
-                    "company": {
-                        "type": "string",
-                        "description": "Company name",
-                    },
-                    "title": {"type": "string", "description": "Job title"},
-                    "start_date": {
-                        "anyOf": [{"type": "string"}, {"type": "null"}],
-                        "description": "Start date of employment",
-                    },
-                    "end_date": {
-                        "anyOf": [{"type": "string"}, {"type": "null"}],
-                        "description": "End date of employment",
-                    },
-                },
-            },
-        },
-    },
-}
-
-# Use the schema for extraction
-config = ExtractConfig(extraction_mode=ExtractMode.FAST)
-result = extractor.extract(schema, config, "resume.pdf")
-```
-
-### Important restrictions on JSON/Pydantic Schema
-
-_LlamaExtract only supports a subset of the JSON Schema specification._ While limited, it should
-be sufficient for a wide variety of use-cases.
-
- All fields are required by default. Nullable fields must be explicitly marked as such,
-  using `anyOf` with a `null` type. See `"start_date"` field above.
- Root node must be of type `object`.
- Schema nesting must be limited to within 5 levels.
- The important fields are key names/titles, type and description. Fields for
-  formatting, default values, etc. are **not supported**. If you need these, you can add the
-  restrictions to your field description and/or use a post-processing step. e.g. default values can be supported by making a field optional and then setting `"null"` values from the extraction result to the default value.
- There are other restrictions on number of keys, size of the schema, etc. that you may
-  hit for complex extraction use cases. In such cases, it is worth thinking how to restructure
-  your extraction workflow to fit within these constraints, e.g. by extracting subset of fields
-  and later merging them together.
-
-## Extraction Configuration
-
-Configure how extraction is performed using `ExtractConfig`. The schema is the most important part, but several configuration options can significantly impact the extraction process.
-
-```python
-from llama_cloud import ExtractConfig, ExtractMode, ChunkMode, ExtractTarget
-
-# Basic configuration
-config = ExtractConfig(
-    extraction_mode=ExtractMode.BALANCED,  # FAST, BALANCED, MULTIMODAL, PREMIUM
-    extraction_target=ExtractTarget.PER_DOC,  # PER_DOC, PER_PAGE
-    system_prompt="Focus on the most recent data",
-    page_range="1-5,10-15",  # Extract from specific pages
-)
-
-# Advanced configuration
-advanced_config = ExtractConfig(
-    extraction_mode=ExtractMode.MULTIMODAL,
-    chunk_mode=ChunkMode.PAGE,  # PAGE, SECTION
-    high_resolution_mode=True,  # Better OCR accuracy
-    invalidate_cache=False,  # Bypass cached results
-    cite_sources=True,  # Enable source citations
-    use_reasoning=True,  # Enable reasoning (not in FAST mode)
-    confidence_scores=True,  # MULTIMODAL/PREMIUM only
-)
-```
-
-### Key Configuration Options
-
-**Extraction Mode**: Controls processing quality and speed
-
- `FAST`: Fastest processing, suitable for simple documents with no OCR
- `BALANCED`: Good speed/accuracy tradeoff for text-rich documents
- `MULTIMODAL`: For visually rich documents with text, tables, and images (recommended)
- `PREMIUM`: Highest accuracy with OCR, complex table/header detection
-
-**Extraction Target**: Defines extraction scope
-
- `PER_DOC`: Apply schema to entire document (default)
- `PER_PAGE`: Apply schema to each page, returns array of results
-
-**Advanced Options**:
-
- `system_prompt`: Additional system-level instructions
- `page_range`: Specific pages to extract (e.g., "1,3,5-7,9")
- `chunk_mode`: Document splitting strategy (`PAGE` or `SECTION`)
- `high_resolution_mode`: Better OCR for small text (slower processing)
-
-**Extensions** (return additional metadata):
-
- `cite_sources`: Source tracing for extracted fields
- `use_reasoning`: Explanations for extraction decisions
- `confidence_scores`: Quantitative confidence measures (MULTIMODAL/PREMIUM only)
-
-For complete configuration options, advanced settings, and detailed examples, see the [LlamaExtract Configuration Documentation](https://docs.cloud.llamaindex.ai/llamaextract/features/options).
-
-## Extraction Agents (Advanced)
-
-For reusable extraction workflows, you can create extraction agents that encapsulate both schema and configuration:
-
-### Creating Agents
-
-```python
-from llama_cloud_services import LlamaExtract
-from llama_cloud import ExtractConfig, ExtractMode
-from pydantic import BaseModel, Field
-
-# Initialize client
-extractor = LlamaExtract()
-
-
-# Define schema
-class Resume(BaseModel):
-    name: str = Field(description="Full name of candidate")
-    email: str = Field(description="Email address")
-    skills: list[str] = Field(description="Technical skills and technologies")
-
-
-# Configure extraction settings
-config = ExtractConfig(extraction_mode=ExtractMode.FAST)
-
-# Create extraction agent
-agent = extractor.create_agent(
-    name="resume-parser", data_schema=Resume, config=config
-)
-
-# Use the agent
-result = agent.extract("resume.pdf")
-print(result.data)
-```
-
-### Agent Batch Processing
-
-Process multiple files with an agent:
-
-```python
-# Queue multiple files for extraction
-jobs = await agent.queue_extraction(["resume1.pdf", "resume2.pdf"])
-
-# Check job status
-for job in jobs:
-    status = agent.get_extraction_job(job.id).status
-    print(f"Job {job.id}: {status}")
-
-# Get results when complete
-results = [agent.get_extraction_run_for_job(job.id) for job in jobs]
-```
-
-### Updating Agent Schemas
-
-Schemas can be modified and updated after creation:
-
-```python
-# Update schema
-agent.data_schema = new_schema
-
-# Save changes
-agent.save()
-```
-
-### Managing Agents
-
-```python
-# List all agents
-agents = extractor.list_agents()
-
-# Get specific agent
-agent = extractor.get_agent(name="resume-parser")
-
-# Delete agent
-extractor.delete_agent(agent.id)
-```
-
-### When to Use Agents vs Direct Extraction
-
-**Use Direct Extraction When:**
-
- One-off extractions
- Different schemas for different documents
- Simple workflows
- Getting started quickly
-
-**Use Extraction Agents When:**
-
- Repeated extractions with the same schema
- Team collaboration (shared, named extractors)
- Complex workflows requiring state management
- Production systems with consistent extraction patterns
-
-## Installation
-
-```bash
-pip install llama-cloud-services
-```
-
-## Tips & Best Practices
-
-At the core of LlamaExtract is the schema, which defines the structure of the data you want to extract from your documents.
-
-1. **Schema Design**:
-
-   - Try to limit schema nesting to 3-4 levels.
-   - Make fields optional when data might not always be present. Having required fields may force the model
-     to hallucinate when these fields are not present in the documents.
-   - When you want to extract a variable number of entities, use an `array` type. However, note that you cannot use
-     an `array` type for the root node.
-   - Use descriptive field names and detailed descriptions. Use descriptions to pass formatting
-     instructions or few-shot examples.
-   - Above all, start simple and iteratively build your schema to incorporate requirements.
-
-2. **Running Extractions**:
-   - Note that resetting `agent.schema` will not save the schema to the database,
-     until you call `agent.save`, but it will be used for running extractions.
-   - Check extraction results for any errors. Error information is available in the `result.error` field for debugging.
-   - Consider async operations (`aextract` or `queue_extraction`) for large-scale extraction or when processing multiple files.
-   - For repeated extractions with the same schema, consider creating an extraction agent to avoid redefining the schema each time.
-
-### Hitting "The response was too long to be processed" Error
-
-This implies that the extraction response is hitting output token limits of the LLM. In such cases, it is worth rethinking the design of your schema to enable a more efficient/scalable extraction. e.g.
-
- Instead of one field that extracts a complex object, you can use multiple fields to distribute the extraction logic.
- You can also use multiple schemas to extract different subsets of fields from the same document and merge them later.
-
-Another option (orthogonal to the above) is to break the document into smaller sections and extract from each section individually, when possible. LlamaExtract will in most cases be able to handle both document and schema chunking automatically, but there are cases where you may need to do this manually.
-
-## Additional Resources
-
- [Extract Documentation](https://docs.cloud.llamaindex.ai/llamaextract/getting_started) - Details on Extract features, API and examples.
- [Example Notebook](docs/examples-py/extract/resume_screening.ipynb) - Detailed walkthrough of resume parsing
- [Example Application with TypeScript](./examples-ts/extract/) - End-to-end examples using LlamaExtract TypeScript client.
- [Discord Community](https://discord.com/invite/eN6D2HQ4aX) - Get help and share feedback
@@ -1,86 +0,0 @@
-# LlamaCloud Index + Retriever
-
-LlamaCloud is a new generation of managed parsing, ingestion, and retrieval services, designed to bring production-grade context-augmentation to your LLM and RAG applications.
-
-Currently, LlamaCloud supports
-
- Managed Ingestion API, handling parsing and document management
- Managed Retrieval API, configuring optimal retrieval for your RAG system
-
-## Access
-
-We are opening up a private beta to a limited set of enterprise partners for the managed ingestion and retrieval API. If you’re interested in centralizing your data pipelines and spending more time working on your actual RAG use cases, come [talk to us.](https://www.llamaindex.ai/contact)
-
-If you have access to LlamaCloud, you can visit [LlamaCloud](https://cloud.llamaindex.ai) to sign in and get an API key.
-
-## Setup
-
-First, make sure you have the latest LlamaIndex version installed.
-
-```
-pip uninstall llama-index  # run this if upgrading from v0.9.x or older
-pip install -U llama-index --upgrade --no-cache-dir --force-reinstall
-```
-
-The `llama-index-indices-managed-llama-cloud` package is included with the above install, but you can also install directly
-
-```
-pip install -U llama-index-indices-managed-llama-cloud
-```
-
-## Usage
-
-You can create an index on LlamaCloud using the following code. By default, new indexes use managed embeddings (OpenAI text-embedding-3-small, 1536 dimensions, 1 credit/page):
-
-```python
-import os
-
-os.environ[
-    "LLAMA_CLOUD_API_KEY"
-] = "llx-..."  # can provide API-key in env or in the constructor later on
-
-from llama_index.core import SimpleDirectoryReader
-from llama_cloud_services import LlamaCloudIndex
-
-# create a new index (uses managed embeddings by default)
-index = LlamaCloudIndex.from_documents(
-    documents,
-    "my_first_index",
-    project_name="default",
-    api_key="llx-...",
-    verbose=True,
-)
-
-# connect to an existing index
-index = LlamaCloudIndex("my_first_index", project_name="default")
-```
-
-You can also configure a retriever for managed retrieval:
-
-```python
-# from the existing index
-index.as_retriever()
-
-# from scratch
-from llama_index.indices.managed.llama_cloud import LlamaCloudRetriever
-
-retriever = LlamaCloudRetriever("my_first_index", project_name="default")
-```
-
-And of course, you can use other index shortcuts to get use out of your new managed index:
-
-```python
-query_engine = index.as_query_engine(llm=llm)
-
-chat_engine = index.as_chat_engine(llm=llm)
-```
-
-## Retriever Settings
-
-A full list of retriever settings/kwargs is below:
-
- `dense_similarity_top_k`: Optional[int] -- If greater than 0, retrieve `k` nodes using dense retrieval
- `sparse_similarity_top_k`: Optional[int] -- If greater than 0, retrieve `k` nodes using sparse retrieval
- `enable_reranking`: Optional[bool] -- Whether to enable reranking or not. Sacrifices some speed for accuracy
- `rerank_top_n`: Optional[int] -- The number of nodes to return after reranking initial retrieval results
- `alpha` Optional[float] -- The weighting between dense and sparse retrieval. 1 = Full dense retrieval, 0 = Full sparse retrieval.
@@ -1,163 +0,0 @@
-# LlamaParse
-
-LlamaParse is a **GenAI-native document parser** that can parse complex document data for any downstream LLM use case (RAG, agents).
-
-It is really good at the following:
-
- ✅ **Broad file type support**: Parsing a variety of unstructured file types (.pdf, .pptx, .docx, .xlsx, .html) with text, tables, visual elements, weird layouts, and more.
- ✅ **Table recognition**: Parsing embedded tables accurately into text and semi-structured representations.
- ✅ **Multimodal parsing and chunking**: Extracting visual elements (images/diagrams) into structured formats and return image chunks using the latest multimodal models.
- ✅ **Custom parsing**: Input custom prompt instructions to customize the output the way you want it.
-
-LlamaParse directly integrates with [LlamaIndex](https://github.com/run-llama/llama_index).
-
-The free plan is up to 1000 pages a day. Paid plan is free 7k pages per week + 0.3c per additional page by default. There is a sandbox available to test the API [**https://cloud.llamaindex.ai/parse ↗**](https://cloud.llamaindex.ai/parse).
-
-Read below for some quickstart information, or see the [full documentation](https://docs.cloud.llamaindex.ai/).
-
-If you're a company interested in enterprise RAG solutions, and/or high volume/on-prem usage of LlamaParse, come [talk to us](https://www.llamaindex.ai/contact).
-
-## Getting Started
-
-First, login and get an api-key from [**https://cloud.llamaindex.ai/api-key ↗**](https://cloud.llamaindex.ai/api-key).
-
-Then, install the package:
-
-`pip install llama-cloud-services`
-
-## CLI Usage
-
-Now you can parse your first PDF file using the command line interface. Use the command `llama-parse [file_paths]`. See the help text with `llama-parse --help`.
-
-```bash
-export LLAMA_CLOUD_API_KEY='llx-...'
-
-# output as text
-llama-parse my_file.pdf --result-type text --output-file output.txt
-
-# output as markdown
-llama-parse my_file.pdf --result-type markdown --output-file output.md
-
-# output as raw json
-llama-parse my_file.pdf --output-raw-json --output-file output.json
-```
-
-## Python Usage
-
-You can also create simple scripts:
-
-```python
-from llama_cloud_services import LlamaParse
-
-parser = LlamaParse(
-    api_key="llx-...",  # can also be set in your env as LLAMA_CLOUD_API_KEY
-    num_workers=4,  # if multiple files passed, split in `num_workers` API calls
-    verbose=True,
-    language="en",  # Optionally you can define a language, default=en
-)
-
-# sync
-result = parser.parse("./my_file.pdf")
-
-# sync batch
-results = parser.parse(["./my_file1.pdf", "./my_file2.pdf"])
-
-# async
-result = await parser.aparse("./my_file.pdf")
-
-# async batch
-results = await parser.aparse(["./my_file1.pdf", "./my_file2.pdf"])
-```
-
-The result object is a fully typed `JobResult` object, and you can interact with it to parse and transform various parts of the result:
-
-```python
-# get the llama-index markdown documents
-markdown_documents = result.get_markdown_documents(split_by_page=True)
-
-# get the llama-index text documents
-text_documents = result.get_text_documents(split_by_page=False)
-
-# get the image documents
-image_documents = result.get_image_documents(
-    include_screenshot_images=True,
-    include_object_images=False,
-    # Optional: download the images to a directory
-    # (default is to return the image bytes in ImageDocument objects)
-    image_download_dir="./images",
-)
-
-# access the raw job result
-# Items will vary based on the parser configuration
-for page in result.pages:
-    print(page.text)
-    print(page.md)
-    print(page.images)
-    print(page.layout)
-    print(page.structuredData)
-```
-
-See more details about the result object in the [example notebook](./docs/examples-py/parse/demo_json_tour.ipynb).
-
-### Using with file object / bytes
-
-You can parse a file object directly:
-
-```python
-from llama_cloud_services import LlamaParse
-
-parser = LlamaParse(
-    api_key="llx-...",  # can also be set in your env as LLAMA_CLOUD_API_KEY
-    num_workers=4,  # if multiple files passed, split in `num_workers` API calls
-    verbose=True,
-    language="en",  # Optionally you can define a language, default=en
-)
-
-file_name = "my_file1.pdf"
-extra_info = {"file_name": file_name}
-
-with open(f"./{file_name}", "rb") as f:
-    # must provide extra_info with file_name key with passing file object
-    result = parser.parse(f, extra_info=extra_info)
-
-# you can also pass file bytes directly
-with open(f"./{file_name}", "rb") as f:
-    file_bytes = f.read()
-    # must provide extra_info with file_name key with passing file bytes
-    result = parser.parse(file_bytes, extra_info=extra_info)
-```
-
-### Using with `SimpleDirectoryReader`
-
-You can also integrate the parser as the default PDF loader in `SimpleDirectoryReader`:
-
-```python
-from llama_cloud_services import LlamaParse
-from llama_index.core import SimpleDirectoryReader
-
-parser = LlamaParse(
-    api_key="llx-...",  # can also be set in your env as LLAMA_CLOUD_API_KEY
-    result_type="markdown",  # "markdown" and "text" are available
-    verbose=True,
-)
-
-file_extractor = {".pdf": parser}
-documents = SimpleDirectoryReader(
-    "./data", file_extractor=file_extractor
-).load_data()
-```
-
-Full documentation for `SimpleDirectoryReader` can be found on the [LlamaIndex Documentation](https://developers.llamaindex.ai/python/framework/module_guides/loading/simpledirectoryreader/).
-
-## Examples
-
-Several end-to-end indexing examples can be found in the examples folder
-
- [Getting Started](docs/examples-py/parse/demo_basic.ipynb)
- [Advanced RAG Example](docs/examples-py/parse/demo_advanced.ipynb)
- [Raw API Usage](docs/examples-py/parse/demo_api.ipynb)
- [Result Object Tour](docs/examples-py/parse/demo_json_tour.ipynb)
-
-## Documentation
-
-[https://docs.cloud.llamaindex.ai/](https://docs.cloud.llamaindex.ai/)
@@ -1,5 +1,78 @@
 # llama-cloud-services-py

+## 0.6.94
+
+### Patch Changes
+
+- 232c55b: Include xlsx files in extract input
+
+## 0.6.93
+
+### Patch Changes
+
+- da1916c: Add more warnings
+
+## 0.6.92
+
+### Patch Changes
+
+- 2358df1: add deprecation notices
+
+## 0.6.91
+
+### Patch Changes
+
+- 07ec282: Bump up patch versions for python packages
+- 3040951: Use error description in ExtractedData invalid extraction error
+
+## 0.6.90
+
+### Patch Changes
+
+- 19cbb25: Remove extension filter
+
+## 0.6.89
+
+### Patch Changes
+
+- b9b83c9: Parse bounding boxes from extract jobs results in agent data
+
+## 0.6.88
+
+### Patch Changes
+
+- 71db318: Add tier and version
+
+## 0.6.87
+
+### Patch Changes
+
+- 06c3c55: Update spreadsheet parsing config
+
+## 0.6.86
+
+### Patch Changes
+
+- 1b7198d: Update extract to have confidence scores available in all modes
+
+## 0.6.85
+
+### Patch Changes
+
+- ae30990: Add line-level bbox support
+
+## 0.6.84
+
+### Patch Changes
+
+- 0a110de: Release to re-align versions
+
+## 0.6.83
+
+### Patch Changes
+
+- ca78113: Do not use presigned URLs by default in files client
+
 ## 0.6.82

 ### Patch Changes
@@ -4,6 +4,16 @@

 # Llama Cloud Services

+> **⚠️ DEPRECATION NOTICE**
+>
+> This repository and its packages are deprecated and will be maintained until **May 1, 2026**.
+>
+> **Please migrate to the new packages:**
+> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))
+> - **TypeScript**: `npm install @llamaindex/llama-cloud` ([GitHub](https://github.com/run-llama/llama-cloud-ts))
+>
+> The new packages provide the same functionality with improved performance, better support, and active development.
+
 This repository contains the code for hand-written SDKs and clients for interacting with LlamaCloud.

 This includes:
@@ -1,3 +1,5 @@
+import warnings
+
 from llama_cloud_services.parse import LlamaParse
 from llama_cloud_services.extract import LlamaExtract, ExtractionAgent
 from llama_cloud_services.utils import SourceText, FileInput
@@ -8,6 +10,16 @@ from llama_cloud_services.index import (
    LlamaCloudRetriever,
 )

+# Emit deprecation warning once when package is imported
+warnings.warn(
+    "This package (llama-cloud-services) is deprecated and will be maintained until May 1, 2026. "
+    "Please migrate to the new package: pip install llama-cloud>=1.0 "
+    "(https://github.com/run-llama/llama-cloud-py). "
+    "The new package provides the same functionality with improved performance and support.",
+    DeprecationWarning,
+    stacklevel=2,
+)
+
 __all__ = [
    "LlamaParse",
    "LlamaExtract",
@@ -11,6 +11,9 @@ from .schema import (
    InvalidExtractionData,
    ExtractedFieldMetadata,
    ExtractedFieldMetaDataDict,
+    FieldCitation,
+    BoundingBox,
+    PageDimensions,
 )
 from .client import AsyncAgentDataClient

@@ -28,4 +31,7 @@ __all__ = [
    "InvalidExtractionData",
    "ExtractedFieldMetadata",
    "ExtractedFieldMetaDataDict",
+    "FieldCitation",
+    "BoundingBox",
+    "PageDimensions",
 ]
@@ -174,6 +174,22 @@ class TypedAgentDataItems(BaseModel, Generic[AgentDataT]):
    )


+class BoundingBox(BaseModel):
+    """Bounding box coordinates for a citation location on a page."""
+
+    x: float = Field(description="X coordinate of the bounding box origin")
+    y: float = Field(description="Y coordinate of the bounding box origin")
+    w: float = Field(description="Width of the bounding box")
+    h: float = Field(description="Height of the bounding box")
+
+
+class PageDimensions(BaseModel):
+    """Dimensions of a page in the source document."""
+
+    width: float = Field(description="Width of the page")
+    height: float = Field(description="Height of the page")
+
+
 class FieldCitation(BaseModel):
    page: Optional[int] = Field(
        None, description="The page number that the field occurred on"
@@ -182,6 +198,14 @@ class FieldCitation(BaseModel):
        None,
        description="The original text this field's value was derived from",
    )
+    bounding_boxes: Optional[List[BoundingBox]] = Field(
+        None,
+        description="Bounding boxes indicating where the citation appears on the page",
+    )
+    page_dimensions: Optional[PageDimensions] = Field(
+        None,
+        description="Dimensions of the page containing the citation",
+    )


 class ExtractedFieldMetadata(BaseModel):
@@ -201,6 +225,10 @@ class ExtractedFieldMetadata(BaseModel):
        None,
        description="The confidence score for the field based on the extracted text only",
    )
+    parsing_confidence: Optional[float] = Field(
+        None,
+        description="The confidence score for the field based on the parsing/OCR quality",
+    )
    citation: Optional[List[FieldCitation]] = Field(
        None,
        description="The citation for the field, including page number and matching text",
@@ -447,26 +475,49 @@ class ExtractedData(BaseModel, Generic[ExtractedT]):
                },
            )
        except ValidationError as e:
+            # Capture the job-level error from the extraction run if available
+            job_error = result.error
+
            invalid_item = ExtractedData[Dict[str, Any]].create(
                data=result.data or {},
                status="error",
                field_metadata=field_metadata,
-                metadata={"extraction_error": str(e), **(metadata or {})},
+                metadata={
+                    "extraction_error": str(e),
+                    **({"job_error": job_error} if job_error else {}),
+                    **(metadata or {}),
+                },
                file_id=file_id,
                file_name=file_name,
                file_hash=file_hash,
            )
-            raise InvalidExtractionData(invalid_item) from e
+            raise InvalidExtractionData(invalid_item, extraction_error=job_error) from e


 class InvalidExtractionData(Exception):
    """
    Exception raised when the extracted data does not conform to the schema.
+
+    Attributes:
+        invalid_item: The ExtractedData instance containing the invalid data and metadata
+        extraction_error: The error message from the extraction job, if available
    """

-    def __init__(self, invalid_item: ExtractedData[Dict[str, Any]]):
+    def __init__(
+        self,
+        invalid_item: ExtractedData[Dict[str, Any]],
+        extraction_error: Optional[str] = None,
+    ):
        self.invalid_item = invalid_item
-        super().__init__("Not able to parse the extracted data, parsed invalid format")
+        self.extraction_error = extraction_error
+
+        # Build an informative error message
+        if extraction_error:
+            message = f"Extraction error: {extraction_error}"
+        else:
+            message = "Not able to parse the extracted data, parsed invalid format"
+
+        super().__init__(message)


 def calculate_overall_confidence(
@@ -2,7 +2,7 @@ import asyncio
 import io
 import os
 import time
-from typing import TYPE_CHECKING
+from typing import Any, Dict, TYPE_CHECKING

 import httpx
 from llama_cloud.client import AsyncLlamaCloud
@@ -68,6 +68,8 @@ class LlamaSheets:
        max_timeout: int = 300,
        poll_interval: int = 5,
        max_retries: int = 3,
+        project_id: str | None = None,
+        organization_id: str | None = None,
        async_httpx_client: httpx.AsyncClient | None = None,
    ) -> None:
        """Initialize the LlamaSheets client.
@@ -78,6 +80,8 @@ class LlamaSheets:
            max_timeout: Maximum time to wait for job completion in seconds
            poll_interval: Interval between status checks in seconds
            max_retries: Maximum number of retries for failed requests
+            project_id: Project ID for file operations. If not provided, will use LLAMA_CLOUD_PROJECT_ID env var
+            organization_id: Organization ID for file operations. If not provided, will use LLAMA_CLOUD_ORGANIZATION_ID env var
            async_httpx_client: Optional custom async httpx client
        """
        self.api_key = api_key or os.environ.get("LLAMA_CLOUD_API_KEY")
@@ -93,15 +97,32 @@ class LlamaSheets:
        self.poll_interval = poll_interval
        self.max_retries = max_retries

+        self.project_id = project_id or os.environ.get("LLAMA_CLOUD_PROJECT_ID")
+        self.organization_id = organization_id or os.environ.get(
+            "LLAMA_CLOUD_ORGANIZATION_ID"
+        )
+
        self._async_client: httpx.AsyncClient | None = async_httpx_client
        self._files_client = FileClient(
            AsyncLlamaCloud(
                token=self.api_key,
                base_url=self.base_url,
                httpx_client=async_httpx_client,
-            )
+            ),
+            project_id=self.project_id,
+            organization_id=self.organization_id,
        )

+    def _get_default_params(self) -> dict[str, str]:
+        """Get default query parameters for API requests"""
+        params = {}
+        if self.project_id is not None:
+            params["project_id"] = self.project_id
+        if self.organization_id is not None:
+            params["organization_id"] = self.organization_id
+
+        return params
+
    def _get_async_client(self) -> httpx.AsyncClient:
        """Get or create the async httpx client"""
        if self._async_client is None:
@@ -306,6 +327,8 @@ class LlamaSheets:
            "config": config.model_dump(mode="json", exclude_none=True),
        }

+        params = self._get_default_params()
+
        try:
            async for attempt in AsyncRetrying(
                stop=stop_after_attempt(self.max_retries),
@@ -318,6 +341,7 @@ class LlamaSheets:
                    response = await client.post(
                        f"{self.base_url}/api/v1/beta/sheets/jobs",
                        headers=self._get_headers(),
+                        params=params,
                        json=payload,
                    )
                    response.raise_for_status()
@@ -347,12 +371,17 @@ class LlamaSheets:
            ):
                with attempt:
                    client = self._get_async_client()
+                    params: Dict[str, Any] = {
+                        "include_results": include_results_metadata,
+                        **self._get_default_params(),
+                    }
                    response = await client.get(
                        f"{self.base_url}/api/v1/beta/sheets/jobs/{job_id}",
                        headers=self._get_headers(),
-                        params={"include_results": include_results_metadata},
+                        params=params,
                    )
                    response.raise_for_status()
+
                    return SpreadsheetJobResult.model_validate(response.json())
        except Exception as e:
            raise SpreadsheetAPIError(f"Failed to get job status: {e}") from e
@@ -415,6 +444,8 @@ class LlamaSheets:
        # Get presigned URL
        presigned_response = None
        result_type_str = str(result_type)
+        params = self._get_default_params()
+
        try:
            async for attempt in AsyncRetrying(
                stop=stop_after_attempt(self.max_retries),
@@ -427,6 +458,7 @@ class LlamaSheets:
                    response = await client.get(
                        f"{self.base_url}/api/v1/beta/sheets/jobs/{job_id}/regions/{region_id}/result/{result_type_str}",
                        headers=self._get_headers(),
+                        params=params,
                    )
                    response.raise_for_status()
                    presigned_response = PresignedUrlResponse.model_validate(
@@ -1,5 +1,6 @@
 from datetime import datetime
 from enum import Enum
+from typing import Literal

 from pydantic import BaseModel, ConfigDict, Field, field_validator

@@ -63,7 +64,7 @@ class SpreadsheetParseResult(BaseModel):
 class SpreadsheetParsingConfig(BaseModel):
    """Configuration for spreadsheet parsing and region extraction"""

-    model_config = ConfigDict(extra="forbid")
+    model_config = ConfigDict(extra="ignore")

    sheet_names: list[str] | None = Field(
        default=None,
@@ -86,6 +87,16 @@ class SpreadsheetParsingConfig(BaseModel):
        description="Enables experimental processing. Accuracy may be impacted.",
    )

+    flatten_hierarchical_tables: bool = Field(
+        default=False,
+        description="Return a flattened dataframe when a detected table is recognized as hierarchical.",
+    )
+
+    table_merge_sensitivity: Literal["strong", "weak"] = Field(
+        default="strong",
+        description="Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging).",
+    )
+

 class SpreadsheetJob(BaseModel):
    """A spreadsheet parsing job"""
@@ -4,10 +4,11 @@ import os
 import time
 from io import BufferedIOBase, TextIOWrapper
 from pathlib import Path
-from typing import List, Optional, Type, Union, Coroutine, Any, TypeVar
+from typing import Callable, List, Optional, Type, Union, Coroutine, Any, TypeVar
 import warnings
 import httpx
 from pydantic import BaseModel
+from functools import wraps
 from tenacity import (
    retry_if_exception,
    stop_after_attempt,
@@ -54,7 +55,7 @@ DEFAULT_EXTRACT_CONFIG = ExtractConfig(
 def _is_retryable_error(exception: BaseException) -> bool:
    """Check if an exception is retryable."""
    if isinstance(exception, ApiError):
-        return exception.status_code in (502, 503, 504, 425, 408)
+        return exception.status_code in (429, 500, 502, 503, 504, 425, 408)
    elif isinstance(
        exception, (httpx.HTTPStatusError, httpx.RequestError, httpx.TimeoutException)
    ):
@@ -62,6 +63,33 @@ def _is_retryable_error(exception: BaseException) -> bool:
    return False


+def _async_retry(
+    max_attempts: int = 5,
+    initial_wait: float = 1,
+    max_wait: float = 30,
+    jitter: float = 3,
+) -> Callable:
+    """Decorator for async functions with retry logic for rate limiting and transient errors."""
+
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        async def wrapper(*args: Any, **kwargs: Any) -> Any:
+            async for attempt in AsyncRetrying(
+                retry=retry_if_exception(_is_retryable_error),
+                stop=stop_after_attempt(max_attempts),
+                wait=wait_exponential_jitter(
+                    initial=initial_wait, max=max_wait, jitter=jitter
+                ),
+                reraise=True,
+            ):
+                with attempt:
+                    return await func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
 async def _validate_schema(
    client: AsyncLlamaCloud, data_schema: SchemaInput
 ) -> JSONObjectType:
@@ -82,50 +110,6 @@ async def _validate_schema(
    return validated_schema.data_schema


-async def _get_job_with_retry(
-    client: AsyncLlamaCloud,
-    job_id: str,
-    max_attempts: int = 5,
-    initial_wait: float = 1,
-    max_wait: float = 60,
-    jitter: float = 5,
-) -> ExtractJob:
-    """Get extraction job with retry logic."""
-    async for attempt in AsyncRetrying(
-        retry=retry_if_exception(_is_retryable_error),
-        stop=stop_after_attempt(max_attempts),
-        wait=wait_exponential_jitter(initial=initial_wait, max=max_wait, jitter=jitter),
-        reraise=True,
-    ):
-        with attempt:
-            return await client.llama_extract.get_job(job_id=job_id)
-
-
-async def _get_run_with_retry(
-    client: AsyncLlamaCloud,
-    job_id: str,
-    project_id: Optional[str] = None,
-    organization_id: Optional[str] = None,
-    max_attempts: int = 3,
-    initial_wait: float = 1,
-    max_wait: float = 20,
-    jitter: float = 3,
-) -> ExtractRun:
-    """Get extraction run with retry logic."""
-    async for attempt in AsyncRetrying(
-        retry=retry_if_exception(_is_retryable_error),
-        stop=stop_after_attempt(max_attempts),
-        wait=wait_exponential_jitter(initial=initial_wait, max=max_wait, jitter=jitter),
-        reraise=True,
-    ):
-        with attempt:
-            return await client.llama_extract.get_run_by_job_id(
-                job_id=job_id,
-                project_id=project_id,
-                organization_id=organization_id,
-            )
-
-
 async def _wait_for_job_result(
    client: AsyncLlamaCloud,
    job_id: str,
@@ -142,30 +126,33 @@ async def _wait_for_job_result(
    run_jitter: float = 3,
 ) -> Optional[ExtractRun]:
    """Wait for and return the results of an extraction job."""
+
+    @_async_retry(
+        max_attempts=job_retry_attempts, max_wait=job_max_wait, jitter=job_jitter
+    )
+    async def _get_job() -> ExtractJob:
+        return await client.llama_extract.get_job(job_id=job_id)
+
+    @_async_retry(
+        max_attempts=run_retry_attempts, max_wait=run_max_wait, jitter=run_jitter
+    )
+    async def _get_run() -> ExtractRun:
+        return await client.llama_extract.get_run_by_job_id(
+            job_id=job_id,
+            project_id=project_id,
+            organization_id=organization_id,
+        )
+
    start = time.perf_counter()
    poll_count = 0

    while True:
        await asyncio.sleep(check_interval)
        poll_count += 1
-        job = await _get_job_with_retry(
-            client,
-            job_id,
-            max_attempts=job_retry_attempts,
-            max_wait=job_max_wait,
-            jitter=job_jitter,
-        )
+        job = await _get_job()

        if job.status == StatusEnum.SUCCESS:
-            return await _get_run_with_retry(
-                client,
-                job_id,
-                project_id,
-                organization_id,
-                max_attempts=run_retry_attempts,
-                max_wait=run_max_wait,
-                jitter=run_jitter,
-            )
+            return await _get_run()
        elif job.status == StatusEnum.PENDING:
            end = time.perf_counter()
            if end - start > max_timeout:
@@ -177,15 +164,7 @@ async def _wait_for_job_result(
            warnings.warn(
                f"Failure in job: {job_id}, status: {job.status}, error: {job.error}"
            )
-            return await _get_run_with_retry(
-                client,
-                job_id,
-                project_id,
-                organization_id,
-                max_attempts=run_retry_attempts,
-                max_wait=run_max_wait,
-                jitter=run_jitter,
-            )
+            return await _get_run()


 def run_in_thread(
@@ -240,11 +219,6 @@ def _extraction_config_warning(config: ExtractConfig) -> None:
            raise ValueError(
                "`cite_sources` is only supported with MULTIMODAL or PREMIUM extraction modes."
            )
-    if config.confidence_scores:
-        if config.extraction_mode in (ExtractMode.FAST, ExtractMode.BALANCED):
-            raise ValueError(
-                "`confidence_scores` is only supported with MULTIMODAL or PREMIUM extraction modes."
-            )


 class ExtractionAgent:
@@ -503,9 +477,12 @@ class ExtractionAgent:
        Args:
            run_id (str): The ID of the extraction run to delete
        """
-        self._run_in_thread(
-            self._client.llama_extract.delete_extraction_run(run_id=run_id)
-        )
+
+        @_async_retry()
+        async def _delete() -> None:
+            return await self._client.llama_extract.delete_extraction_run(run_id=run_id)
+
+        self._run_in_thread(_delete())

    def list_extraction_runs(
        self, page: int = 0, limit: int = 100
@@ -515,13 +492,16 @@ class ExtractionAgent:
        Returns:
            PaginatedExtractRunsResponse: Paginated list of extraction runs
        """
-        return self._run_in_thread(
-            self._client.llama_extract.list_extract_runs(
+
+        @_async_retry()
+        async def _list() -> PaginatedExtractRunsResponse:
+            return await self._client.llama_extract.list_extract_runs(
                extraction_agent_id=self.id,
                skip=page * limit,
                limit=limit,
            )
-        )
+
+        return self._run_in_thread(_list())

    def __repr__(self) -> str:
        return f"ExtractionAgent(id={self.id}, name={self.name})"
@@ -663,15 +643,17 @@ class LlamaExtract(BaseComponent):
                "data_schema must be either a dictionary or a Pydantic model"
            )

-        agent = self._run_in_thread(
-            self._async_client.llama_extract.create_extraction_agent(
+        @_async_retry()
+        async def _create() -> CloudExtractAgent:
+            return await self._async_client.llama_extract.create_extraction_agent(
                project_id=self._project_id,
                organization_id=self._organization_id,
                name=name,
                data_schema=data_schema,
                config=config,
            )
-        )
+
+        agent = self._run_in_thread(_create())

        return ExtractionAgent(
            client=self._async_client,
@@ -707,19 +689,27 @@ class LlamaExtract(BaseComponent):
            )

        if id:
-            agent = self._run_in_thread(
-                self._async_client.llama_extract.get_extraction_agent(
+
+            @_async_retry()
+            async def _get_by_id() -> CloudExtractAgent:
+                return await self._async_client.llama_extract.get_extraction_agent(
                    extraction_agent_id=id,
                )
-            )
+
+            agent = self._run_in_thread(_get_by_id())

        elif name:
-            agent = self._run_in_thread(
-                self._async_client.llama_extract.get_extraction_agent_by_name(
-                    name=name,
-                    project_id=self._project_id,
+
+            @_async_retry()
+            async def _get_by_name() -> CloudExtractAgent:
+                return (
+                    await self._async_client.llama_extract.get_extraction_agent_by_name(
+                        name=name,
+                        project_id=self._project_id,
+                    )
                )
-            )
+
+            agent = self._run_in_thread(_get_by_name())
        else:
            raise ValueError("Either name or extraction_agent_id must be provided.")

@@ -739,11 +729,14 @@ class LlamaExtract(BaseComponent):

    def list_agents(self) -> List[ExtractionAgent]:
        """List all available extraction agents."""
-        agents = self._run_in_thread(
-            self._async_client.llama_extract.list_extraction_agents(
+
+        @_async_retry()
+        async def _list() -> List[CloudExtractAgent]:
+            return await self._async_client.llama_extract.list_extraction_agents(
                project_id=self._project_id,
            )
-        )
+
+        agents = self._run_in_thread(_list())

        return [
            ExtractionAgent(
@@ -768,11 +761,14 @@ class LlamaExtract(BaseComponent):
        Args:
            agent_id (str): ID of the extraction agent to delete
        """
-        self._run_in_thread(
-            self._async_client.llama_extract.delete_extraction_agent(
-                extraction_agent_id=agent_id
+
+        @_async_retry()
+        async def _delete() -> None:
+            return await self._async_client.llama_extract.delete_extraction_agent(
+                extraction_agent_id=agent_id,
            )
-        )
+
+        self._run_in_thread(_delete())

    async def _wait_for_job_result(self, job_id: str) -> Optional[ExtractRun]:
        """Wait for and return the results of an extraction job."""
@@ -810,6 +806,7 @@ class LlamaExtract(BaseComponent):
            # Document files
            ".pdf": "application/pdf",
            ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            # Image files
            ".png": "image/png",
            ".jpg": "image/jpeg",
@@ -3,7 +3,7 @@ from typing import BinaryIO
 import os
 from pathlib import Path
 from llama_cloud.client import AsyncLlamaCloud
-from llama_cloud.types import File, FileCreate
+from llama_cloud.types import File
 from typing import Optional
 from llama_cloud_services.utils import SourceText, FileInput

@@ -11,7 +11,7 @@ from llama_cloud_services.utils import SourceText, FileInput
 class FileClient:
    """
    Higher-level client for interacting with the LlamaCloud Files API.
-    Uses presigned URLs for uploads by default.
+    Optionally uses presigned URLs for uploads.

    Args:
        client: The LlamaCloud client to use.
@@ -25,7 +25,7 @@ class FileClient:
        client: AsyncLlamaCloud,
        project_id: Optional[str] = None,
        organization_id: Optional[str] = None,
-        use_presigned_url: bool = True,
+        use_presigned_url: bool = False,
    ):
        self.client = client
        self.project_id = project_id
@@ -73,11 +73,9 @@ class FileClient:
            presigned_url = await self.client.files.generate_presigned_url(
                project_id=self.project_id,
                organization_id=self.organization_id,
-                request=FileCreate(
-                    name=name,
-                    external_file_id=external_file_id,
-                    file_size=file_size,
-                ),
+                name=name,
+                external_file_id=external_file_id,
+                file_size=file_size,
            )
            httpx_client = self.client._client_wrapper.httpx_client
            upload_response = await httpx_client.put(
@@ -91,6 +89,10 @@ class FileClient:
                organization_id=self.organization_id,
            )
        else:
+            # Set buffer.name if not already set, so the upload uses external_file_id
+            # for file type detection
+            if not getattr(buffer, "name", None):
+                setattr(buffer, "name", external_file_id)
            return await self.client.files.upload_file(
                upload_file=buffer,
                external_file_id=external_file_id,
@@ -21,7 +21,6 @@ from llama_cloud import (
    PipelineCreateTransformConfig,
    PipelineFileCreateCustomMetadataValue,
    PipelineType,
-    ProjectCreate,
    ManagedIngestionStatus,
    CloudDocumentCreate,
    CloudDocument,
@@ -507,14 +506,19 @@ class LlamaCloudIndex(BaseManagedIndex):
        client = get_client(api_key, base_url, app_url, timeout)

        if project_id is None:
-            # create project if it doesn't exist
-            project = client.projects.upsert_project(
+            # get project by name
+            projects = client.projects.list_projects(
                organization_id=organization_id,
-                request=ProjectCreate(name=project_name),
+                project_name=project_name,
            )
+            if not projects:
+                raise ValueError(
+                    f"Project '{project_name}' not found. Please create it first in the LlamaCloud UI."
+                )
+            project = projects[0]
            project_id = project.id
            if verbose:
-                print(f"Created project {project_id} with name {project_name}")
+                print(f"Found project {project_id} with name {project_name}")

        # create pipeline
        pipeline_create = PipelineCreate(
@@ -563,15 +567,20 @@ class LlamaCloudIndex(BaseManagedIndex):
        app_url = app_url or os.environ.get("LLAMA_CLOUD_APP_URL", DEFAULT_APP_URL)
        aclient = get_aclient(api_key, base_url, app_url, timeout)

-        # create project if it doesn't exist
-        project = await aclient.projects.upsert_project(
-            organization_id=organization_id, request=ProjectCreate(name=project_name)
+        # get project by name
+        projects = await aclient.projects.list_projects(
+            organization_id=organization_id, project_name=project_name
        )
+        if not projects:
+            raise ValueError(
+                f"Project '{project_name}' not found. Please create it first in the LlamaCloud UI."
+            )
+        project = projects[0]
        if project.id is None:
-            raise ValueError(f"Failed to create/get project {project_name}")
+            raise ValueError(f"Failed to get project {project_name}")

        if verbose:
-            print(f"Created project {project.id} with name {project.name}")
+            print(f"Found project {project.id} with name {project.name}")

        # create pipeline
        pipeline_create = PipelineCreate(
@@ -654,6 +663,9 @@ class LlamaCloudIndex(BaseManagedIndex):
            ],
        )

+        # Trigger a sync
+        client.pipelines.sync_pipeline(pipeline_id=index.pipeline.id)
+
        doc_ids = [doc.id for doc in upserted_documents]
        index.wait_for_completion(
            doc_ids=doc_ids, verbose=verbose, raise_on_error=raise_on_error
@@ -738,6 +750,10 @@ class LlamaCloudIndex(BaseManagedIndex):
                    )
                ],
            )
+
+            # Trigger a sync
+            self._client.pipelines.sync_pipeline(pipeline_id=self.pipeline.id)
+
            upserted_document = upserted_documents[0]
            self.wait_for_completion(
                doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True
@@ -760,6 +776,9 @@ class LlamaCloudIndex(BaseManagedIndex):
                    )
                ],
            )
+            # Trigger a sync
+            await self._aclient.pipelines.sync_pipeline(pipeline_id=self.pipeline.id)
+
            upserted_document = upserted_documents[0]
            await self.await_for_completion(
                doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True
@@ -782,6 +801,9 @@ class LlamaCloudIndex(BaseManagedIndex):
                    )
                ],
            )
+            # Trigger a sync
+            self._client.pipelines.sync_pipeline(pipeline_id=self.pipeline.id)
+
            upserted_document = upserted_documents[0]
            self.wait_for_completion(
                doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True
@@ -804,6 +826,9 @@ class LlamaCloudIndex(BaseManagedIndex):
                    )
                ],
            )
+            # Trigger a sync
+            await self._aclient.pipelines.sync_pipeline(pipeline_id=self.pipeline.id)
+
            upserted_document = upserted_documents[0]
            await self.await_for_completion(
                doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True
@@ -827,6 +852,9 @@ class LlamaCloudIndex(BaseManagedIndex):
                    for doc in documents
                ],
            )
+            # Trigger a sync
+            self._client.pipelines.sync_pipeline(pipeline_id=self.pipeline.id)
+
            doc_ids = [doc.id for doc in upserted_documents]
            self.wait_for_completion(doc_ids=doc_ids, verbose=True, raise_on_error=True)
            return [True] * len(doc_ids)
@@ -849,6 +877,9 @@ class LlamaCloudIndex(BaseManagedIndex):
                    for doc in documents
                ],
            )
+            # Trigger a sync
+            await self._aclient.pipelines.sync_pipeline(pipeline_id=self.pipeline.id)
+
            doc_ids = [doc.id for doc in upserted_documents]
            await self.await_for_completion(
                doc_ids=doc_ids, verbose=True, raise_on_error=True
@@ -564,6 +564,17 @@ class LlamaParse(BasePydanticReader):
        default=None,
        description="Whether to extract the printed page numbers from pages in the document.",
    )
+    line_level_bounding_box: Optional[bool] = Field(
+        default=False,
+        description="If set to true, the parser will include line-level bounding boxes in the result.",
+    )
+    tier: Optional[str] = Field(
+        default=None, description="The tier to use for the parsing job."
+    )
+    version: Optional[str] = Field(
+        default=None,
+        description="The version of the parser to use at the specified tier.",
+    )

    # Deprecated
    bounding_box: Optional[str] = Field(
@@ -740,11 +751,9 @@ class LlamaParse(BasePydanticReader):
            file_path = str(file_input)
            file_ext = os.path.splitext(file_path)[1].lower()
            if file_ext not in SUPPORTED_FILE_TYPES:
-                raise Exception(
-                    f"Currently, only the following file types are supported: {SUPPORTED_FILE_TYPES}\n"
-                    f"Current file type: {file_ext}"
-                )
-            mime_type = mimetypes.guess_type(file_path)[0]
+                mime_type = "application/octet-stream"
+            else:
+                mime_type = mimetypes.guess_type(file_path)[0]
            # Open the file here for the duration of the async context
            # load data, set the mime type
            fs = fs or get_default_fs()
@@ -1118,6 +1127,15 @@ class LlamaParse(BasePydanticReader):
        if self.extract_printed_page_number is not None:
            data["extract_printed_page_number"] = self.extract_printed_page_number

+        if self.line_level_bounding_box is not None:
+            data["line_level_bounding_box"] = self.line_level_bounding_box
+
+        if self.tier is not None:
+            data["tier"] = self.tier
+
+        if self.version is not None:
+            data["version"] = self.version
+
        # Deprecated
        if self.bounding_box is not None:
            data["bounding_box"] = self.bounding_box
@@ -115,6 +115,26 @@ class BBox(SafeBaseModel):
    )


+class LineLevelBboxItem(SafeBaseModel):
+    """A line-level bounding box item."""
+
+    md: Optional[str] = Field(
+        default=None, description="The markdown-formatted content of the line."
+    )
+    text: Optional[str] = Field(
+        default=None, description="The text content of the line."
+    )
+    bBox: Optional[BBox] = Field(
+        default=None, description="The bounding box of the line."
+    )
+    startIndex: Optional[int] = Field(
+        default=None, description="The start index of the line in the page text."
+    )
+    endIndex: Optional[int] = Field(
+        default=None, description="The end index of the line in the page text."
+    )
+
+
 class PageItem(SafeBaseModel):
    """An item in a page."""

@@ -138,6 +158,9 @@ class PageItem(SafeBaseModel):
        default=None,
        description="The HTML-formatted content of the item. Only applicable for table items when output_tables_as_HTML=True.",
    )
+    lines: Optional[List[LineLevelBboxItem]] = Field(
+        default=None, description="The line-level bounding box items of the item."
+    )


 class ImageItem(SafeBaseModel):
@@ -1,5 +1,95 @@
 # llama_parse

+## 0.6.94
+
+### Patch Changes
+
+- 232c55b: Include xlsx files in extract input
+- Updated dependencies [232c55b]
+  - llama-cloud-services-py@0.6.94
+
+## 0.6.93
+
+### Patch Changes
+
+- da1916c: Add more warnings
+- Updated dependencies [da1916c]
+  - llama-cloud-services-py@0.6.93
+
+## 0.6.92
+
+### Patch Changes
+
+- Updated dependencies [2358df1]
+  - llama-cloud-services-py@0.6.92
+
+## 0.6.91
+
+### Patch Changes
+
+- 07ec282: Bump up patch versions for python packages
+- Updated dependencies [07ec282]
+- Updated dependencies [3040951]
+  - llama-cloud-services-py@0.6.91
+
+## 0.6.90
+
+### Patch Changes
+
+- 19cbb25: Remove extension filter
+- Updated dependencies [19cbb25]
+  - llama-cloud-services-py@0.6.90
+
+## 0.6.89
+
+### Patch Changes
+
+- Updated dependencies [b9b83c9]
+  - llama-cloud-services-py@0.6.89
+
+## 0.6.88
+
+### Patch Changes
+
+- Updated dependencies [71db318]
+  - llama-cloud-services-py@0.6.88
+
+## 0.6.87
+
+### Patch Changes
+
+- Updated dependencies [06c3c55]
+  - llama-cloud-services-py@0.6.87
+
+## 0.6.86
+
+### Patch Changes
+
+- 1b7198d: Update extract to have confidence scores available in all modes
+- Updated dependencies [1b7198d]
+  - llama-cloud-services-py@0.6.86
+
+## 0.6.85
+
+### Patch Changes
+
+- Updated dependencies [ae30990]
+  - llama-cloud-services-py@0.6.85
+
+## 0.6.84
+
+### Patch Changes
+
+- Updated dependencies [0a110de]
+  - llama-cloud-services-py@0.6.84
+
+## 0.6.83
+
+### Patch Changes
+
+- Updated dependencies [ca78113]
+  - llama-cloud-services-py@0.6.83
+
 ## 0.6.82

 ### Patch Changes
@@ -1,5 +1,15 @@
 # LlamaParse

+> **⚠️ DEPRECATION NOTICE**
+>
+> This repository and its packages are deprecated and will be maintained until **May 1, 2026**.
+>
+> **Please migrate to the new packages:**
+> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))
+> - **TypeScript**: `npm install @llamaindex/llama-cloud` ([GitHub](https://github.com/run-llama/llama-cloud-ts))
+>
+> The new packages provide the same functionality with improved performance, better support, and active development.
+
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-parse)](https://pypi.org/project/llama-parse/)
 [![GitHub contributors](https://img.shields.io/github/contributors/run-llama/llama_parse)](https://github.com/run-llama/llama_parse/graphs/contributors)
 [![Discord](https://img.shields.io/discord/1059199217496772688)](https://discord.gg/dGcwcsnxhU)
@@ -146,9 +156,9 @@ Full documentation for `SimpleDirectoryReader` can be found on the [LlamaIndex D

 Several end-to-end indexing examples can be found in the examples folder

- [Getting Started](/docs/examples-py/parse/demo_basic.ipynb)
- [Advanced RAG Example](/docs/examples-py/parse/demo_advanced.ipynb)
- [Raw API Usage](/docs/examples-py/parse/demo_api.ipynb)
+- [Getting Started](../../examples/parse/demo_basic.ipynb)
+- [Advanced RAG Example](../../examples/parse/demo_advanced.ipynb)
+- [Raw API Usage](../../examples/parse/demo_api.ipynb)

 ## Documentation

@@ -1,8 +1,18 @@
-from llama_cloud_services.parse import (
+import warnings
+from llama_cloud_services.parse import (  # type: ignore[attr-defined]
    LlamaParse,
    ResultType,
    ParsingMode,
    FailedPageMode,
 )

+warnings.warn(
+    "The 'llama-parse' package is deprecated and will no longer receive updates. "
+    "Please migrate to the new unified SDK. "
+    "See https://developers.llamaindex.ai/python/cloud/llamaparse/getting_started/ "
+    "and https://github.com/run-llama/llama-cloud-py/blob/main/README.md for migration instructions.",
+    DeprecationWarning,
+    stacklevel=2,
+)
+
 __all__ = ["LlamaParse", "ResultType", "ParsingMode", "FailedPageMode"]
@@ -1,6 +1,6 @@
 {
  "name": "llama_parse",
-  "version": "0.6.82",
+  "version": "0.6.94",
  "description": "",
  "main": "index.js",
  "private": false,
@@ -11,13 +11,13 @@ dev = [

 [project]
 name = "llama-parse"
-version = "0.6.83"
+version = "0.6.94"
 description = "Parse files into RAG-Optimized formats."
 authors = [{name = "Logan Markewich", email = "logan@llamaindex.ai"}]
 requires-python = ">=3.9,<4.0"
 readme = "README.md"
 license = "MIT"
-dependencies = ["llama-cloud-services>=0.6.82"]
+dependencies = ["llama-cloud-services>=0.6.94"]

 [project.scripts]
 llama-parse = "llama_parse.cli.main:parse"
@@ -1,6 +1,6 @@
 {
  "name": "llama-cloud-services-py",
-  "version": "0.6.82",
+  "version": "0.6.94",
  "private": false,
  "license": "MIT",
  "scripts": {},
@@ -23,7 +23,7 @@ dev = [

 [project]
 name = "llama-cloud-services"
-version = "0.6.83"
+version = "0.6.94"
 description = "Tailored SDK clients for LlamaCloud services."
 authors = [{name = "Logan Markewich", email = "logan@runllama.ai"}]
 requires-python = ">=3.9,<4.0"
@@ -31,7 +31,7 @@ readme = "README.md"
 license = "MIT"
 dependencies = [
  "llama-index-core>=0.12.0",
-  "llama-cloud==0.1.44",
+  "llama-cloud==0.1.46",
  "pydantic>=2.8,!=2.10",
  "click>=8.1.7,<9",
  "python-dotenv>=1.0.1,<2",
@@ -2,24 +2,58 @@ import os
 import tempfile
 import pytest
 import pandas as pd
+from pydantic import ValidationError

 from llama_cloud_services.beta.sheets import LlamaSheets
 from llama_cloud_services.beta.sheets.types import SpreadsheetParsingConfig


+class TestSpreadsheetParsingConfig:
+    """Unit tests for SpreadsheetParsingConfig."""
+
+    def test_default_values(self):
+        """Test that default values are set correctly."""
+        config = SpreadsheetParsingConfig()
+        assert config.flatten_hierarchical_tables is False
+        assert config.table_merge_sensitivity == "strong"
+
+    def test_custom_values(self):
+        """Test setting custom values for new fields."""
+        config = SpreadsheetParsingConfig(
+            flatten_hierarchical_tables=True,
+            table_merge_sensitivity="weak",
+        )
+        assert config.flatten_hierarchical_tables is True
+        assert config.table_merge_sensitivity == "weak"
+
+    def test_table_merge_sensitivity_validation(self):
+        """Test that invalid table_merge_sensitivity values are rejected."""
+        with pytest.raises(ValidationError):
+            SpreadsheetParsingConfig(table_merge_sensitivity="invalid")
+
+    def test_unknown_fields_ignored(self):
+        """Test that unknown fields are silently ignored."""
+        config = SpreadsheetParsingConfig(
+            unknown_field="test",
+            another_unknown=123,
+        )
+        assert not hasattr(config, "unknown_field")
+        assert not hasattr(config, "another_unknown")
+
+
@pytest.fixture
 def sheets_client():
    """Create a LlamaSheets client for testing."""
-    api_key = os.getenv(
-        "LLAMA_CLOUD_API_KEY", "llx-3AEorIw5v0lnJPzEOI9xSl0N8yFx3fguw0Zn8QJHzGWmwg5r"
-    )
-    base_url = os.getenv("LLAMA_CLOUD_BASE_URL", "https://api.staging.llamaindex.ai")
+    api_key = os.getenv("LLAMA_CLOUD_API_KEY")
+    base_url = os.getenv("LLAMA_CLOUD_BASE_URL", "https://api.cloud.llamaindex.ai")
+    project_id = os.getenv("LLAMA_CLOUD_PROJECT_ID")

    client = LlamaSheets(
        api_key=api_key,
        base_url=base_url,
        max_timeout=300,
        poll_interval=2,
+        project_id=project_id,
    )
    return client

@@ -51,10 +85,7 @@ def sample_excel_file():


@pytest.mark.skipif(
-    os.environ.get(
-        "LLAMA_CLOUD_API_KEY", "llx-3AEorIw5v0lnJPzEOI9xSl0N8yFx3fguw0Zn8QJHzGWmwg5r"
-    )
-    == "",
+    os.environ.get("LLAMA_CLOUD_API_KEY", "") == "",
    reason="LLAMA_CLOUD_API_KEY not set",
 )
@pytest.mark.asyncio
@@ -134,10 +165,7 @@ async def test_spreadsheet_extraction_e2e(


@pytest.mark.skipif(
-    os.environ.get(
-        "LLAMA_CLOUD_API_KEY", "llx-3AEorIw5v0lnJPzEOI9xSl0N8yFx3fguw0Zn8QJHzGWmwg5r"
-    )
-    == "",
+    os.environ.get("LLAMA_CLOUD_API_KEY", "") == "",
    reason="LLAMA_CLOUD_API_KEY not set",
 )
@pytest.mark.asyncio
@@ -1,47 +1,40 @@
-import os
-from typing import List
-from llama_cloud_services.extract import LlamaExtract
+from typing import Any, Dict, Optional, Union

-# Global storage for agents to cleanup
-_TEST_AGENTS_TO_CLEANUP: List[str] = []
+from llama_cloud.core.api_error import ApiError
+from llama_cloud.types import ExtractConfig
+from pydantic import BaseModel
+from tenacity import (
+    retry,
+    retry_if_exception,
+    stop_after_attempt,
+    wait_exponential,
+)
+
+from llama_cloud_services.extract import ExtractionAgent, LlamaExtract
+
+
+def _is_rate_limit_error(exception: BaseException) -> bool:
+    """Check if the exception is a rate limit error (429)."""
+    return isinstance(exception, ApiError) and exception.status_code == 429
+
+
+@retry(
+    retry=retry_if_exception(_is_rate_limit_error),
+    wait=wait_exponential(multiplier=1, min=1, max=30),
+    stop=stop_after_attempt(5),
+    reraise=True,
+)
+def create_agent_with_retry(
+    extractor: LlamaExtract,
+    name: str,
+    data_schema: Union[Dict[str, Any], type[BaseModel]],
+    config: Optional[ExtractConfig] = None,
+) -> ExtractionAgent:
+    """Create an extraction agent with retry logic for rate limiting."""
+    return extractor.create_agent(name=name, data_schema=data_schema, config=config)


 def pytest_configure(config):
    """Register custom markers for extract tests."""
    config.addinivalue_line("markers", "agent_name: custom agent name for test")
    config.addinivalue_line("markers", "agent_schema: custom agent schema for test")
-
-
-def pytest_sessionfinish(session, exitstatus):
-    """Hook that runs after all tests complete - cleanup agents here"""
-    print(
-        f"pytest_sessionfinish hook called! Agents to cleanup: {_TEST_AGENTS_TO_CLEANUP}"
-    )
-
-    if _TEST_AGENTS_TO_CLEANUP:
-        print("Creating cleanup client...")
-        # Create a fresh client just for cleanup
-        cleanup_client = LlamaExtract(
-            api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
-            base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
-            project_id=os.getenv("LLAMA_CLOUD_PROJECT_ID"),
-            verbose=True,
-        )
-
-        for agent_id in _TEST_AGENTS_TO_CLEANUP:
-            try:
-                print(f"Deleting agent {agent_id}...")
-                cleanup_client.delete_agent(agent_id)
-                print(f"Cleaned up agent {agent_id}")
-            except Exception as e:
-                print(f"Warning: Failed to delete agent {agent_id}: {e}")
-
-        _TEST_AGENTS_TO_CLEANUP.clear()
-        print("Agent cleanup completed")
-    else:
-        print("No agents to cleanup")
-
-
-def register_agent_for_cleanup(agent_id: str):
-    """Register an agent ID for cleanup at the end of the test session"""
-    _TEST_AGENTS_TO_CLEANUP.append(agent_id)
@@ -1,4 +1,6 @@
 import os
+import shutil
+import uuid
 import pytest
 from pathlib import Path
 from pydantic import BaseModel
@@ -6,7 +8,7 @@ from pydantic import BaseModel
 from llama_cloud_services.extract import LlamaExtract, ExtractionAgent, SourceText
 from llama_cloud.types import ExtractConfig, ExtractMode, ExtractRun
 from tests.extract.util import load_test_dotenv
-from .conftest import register_agent_for_cleanup
+from .conftest import create_agent_with_retry

 load_test_dotenv()

@@ -59,17 +61,27 @@ def test_schema_dict():


@pytest.fixture
-def test_agent(llama_extract, test_agent_name, test_schema_dict, request):
-    """Creates a test agent and collects it for cleanup at the end of all tests"""
-    test_id = request.node.nodeid
-    test_hash = hex(hash(test_id))[-8:]
-    base_name = test_agent_name
+def unique_test_pdf(tmp_path):
+    """Copy test PDF to a unique path to avoid file deduplication across parallel tests.

+    Uses a UUID in the filename so that external_file_id is unique regardless of
+    whether the full path or just the filename is sent to the backend.
+    """
+    unique_name = f"{TEST_PDF.stem}-{uuid.uuid4().hex[:8]}{TEST_PDF.suffix}"
+    unique_pdf = tmp_path / unique_name
+    shutil.copy2(TEST_PDF, unique_pdf)
+    return unique_pdf
+
+
+@pytest.fixture
+def test_agent(llama_extract, test_agent_name, test_schema_dict, request):
+    """Creates a test agent with a unique name and cleans it up after the test."""
+    unique_id = uuid.uuid4().hex[:8]
    base_name = next(
        (marker.args[0] for marker in request.node.iter_markers("agent_name")),
-        base_name,
+        test_agent_name,
    )
-    name = f"{base_name}_{test_hash}"
+    name = f"{base_name}_{unique_id}"

    schema = next(
        (
@@ -79,21 +91,20 @@ def test_agent(llama_extract, test_agent_name, test_schema_dict, request):
        test_schema_dict,
    )

-    # Cleanup existing agent
-    try:
-        for agent in llama_extract.list_agents():
-            if agent.name == name:
-                llama_extract.delete_agent(agent.id)
-    except Exception as e:
-        print(f"Warning: Failed to cleanup existing agent: {e}")
-
-    agent = llama_extract.create_agent(name=name, data_schema=schema)
-
-    # Add agent to cleanup list via conftest helper
-    register_agent_for_cleanup(agent.id)
+    # Use config with cache invalidation to ensure fresh results in tests
+    config = ExtractConfig(invalidate_cache=True)
+    agent = create_agent_with_retry(
+        llama_extract, name=name, data_schema=schema, config=config
+    )

    yield agent

+    # Inline cleanup -- each worker cleans up its own agents
+    try:
+        llama_extract.delete_agent(agent.id)
+    except Exception as e:
+        print(f"Warning: Failed to cleanup agent {agent.id}: {e}")
+

 class TestLlamaExtract:
    def test_init_without_api_key(self):
@@ -134,34 +145,38 @@ class TestLlamaExtract:

 class TestExtractionAgent:
    @pytest.mark.asyncio
-    async def test_extract_single_file(self, test_agent):
-        result = await test_agent.aextract(TEST_PDF)
+    async def test_extract_single_file(self, test_agent, unique_test_pdf):
+        result = await test_agent.aextract(unique_test_pdf)
        assert result.status == "SUCCESS"
        assert result.data is not None
        assert isinstance(result.data, dict)
        assert "title" in result.data
        assert "summary" in result.data

-    def test_sync_extract_single_file(self, test_agent):
-        result = test_agent.extract(TEST_PDF)
+    def test_sync_extract_single_file(self, test_agent, unique_test_pdf):
+        result = test_agent.extract(unique_test_pdf)
        assert result.status == "SUCCESS"
        assert result.data is not None
        assert isinstance(result.data, dict)
        assert "title" in result.data
        assert "summary" in result.data

-    def test_extract_file_from_buffered_io(self, test_agent):
-        result = test_agent.extract(SourceText(file=open(TEST_PDF, "rb")))
+    def test_extract_file_from_buffered_io(self, test_agent, unique_test_pdf):
+        result = test_agent.extract(
+            SourceText(file=open(unique_test_pdf, "rb"), filename=unique_test_pdf.name)
+        )
        assert result.status == "SUCCESS"
        assert result.data is not None
        assert isinstance(result.data, dict)
        assert "title" in result.data
        assert "summary" in result.data

-    def test_extract_file_from_bytes(self, test_agent):
-        with open(TEST_PDF, "rb") as f:
+    def test_extract_file_from_bytes(self, test_agent, unique_test_pdf):
+        with open(unique_test_pdf, "rb") as f:
            file_bytes = f.read()
-        result = test_agent.extract(SourceText(file=file_bytes, filename=TEST_PDF.name))
+        result = test_agent.extract(
+            SourceText(file=file_bytes, filename=unique_test_pdf.name)
+        )
        assert result.status == "SUCCESS"
        assert result.data is not None
        assert isinstance(result.data, dict)
@@ -177,7 +192,10 @@ class TestExtractionAgent:
        weight for 8 to 13 km (5–8 miles).[3] The name llama (also historically spelled
        "glama") was adopted by European settlers from native Peruvians.
        """
-        result = test_agent.extract(SourceText(text_content=TEST_TEXT))
+        unique_name = f"text-{uuid.uuid4().hex[:8]}.txt"
+        result = test_agent.extract(
+            SourceText(text_content=TEST_TEXT, filename=unique_name)
+        )
        assert result.status == "SUCCESS"
        assert result.data is not None
        assert isinstance(result.data, dict)
@@ -185,8 +203,8 @@ class TestExtractionAgent:
        assert "summary" in result.data

    @pytest.mark.asyncio
-    async def test_extract_multiple_files(self, test_agent):
-        files = [TEST_PDF, TEST_PDF]  # Using same file twice for testing
+    async def test_extract_multiple_files(self, test_agent, unique_test_pdf):
+        files = [unique_test_pdf, unique_test_pdf]  # Using same file twice for testing
        response = await test_agent.aextract(files)

        assert len(response) == 2
@@ -215,15 +233,15 @@ class TestExtractionAgent:
        updated_agent = llama_extract.get_agent(name=test_agent.name)
        assert "new_field" in updated_agent.data_schema["properties"]

-    def test_list_extraction_runs(self, test_agent: ExtractionAgent):
+    def test_list_extraction_runs(self, test_agent: ExtractionAgent, unique_test_pdf):
        assert test_agent.list_extraction_runs().total == 0
-        test_agent.extract(TEST_PDF)
+        test_agent.extract(unique_test_pdf)
        runs = test_agent.list_extraction_runs()
        assert runs.total > 0

-    def test_delete_extraction_run(self, test_agent: ExtractionAgent):
+    def test_delete_extraction_run(self, test_agent: ExtractionAgent, unique_test_pdf):
        assert test_agent.list_extraction_runs().total == 0
-        run: ExtractRun = test_agent.extract(TEST_PDF)
+        run: ExtractRun = test_agent.extract(unique_test_pdf)
        test_agent.delete_extraction_run(run.id)
        runs = test_agent.list_extraction_runs()
        assert runs.total == 0
@@ -237,7 +255,7 @@ class TestStatelessExtraction:

    @pytest.fixture
    def test_config(self):
-        return ExtractConfig(extraction_mode=ExtractMode.FAST)
+        return ExtractConfig(extraction_mode=ExtractMode.FAST, invalidate_cache=True)

    @pytest.fixture
    def test_schema_dict(self):
@@ -1,14 +1,16 @@
 import os
+from pathlib import Path
 import pytest

 from llama_cloud_services.extract import LlamaExtract, ExtractionAgent
+from llama_cloud_services.utils import SourceText
 from collections import namedtuple
 import json
 import uuid
 from llama_cloud.types import ExtractConfig, ExtractMode
 from deepdiff import DeepDiff
 from tests.extract.util import json_subset_match_score, load_test_dotenv
-from .conftest import register_agent_for_cleanup
+from .conftest import create_agent_with_retry

 load_test_dotenv()

@@ -56,10 +58,16 @@ def get_test_cases():
            input_files.append(file_path)

        settings = [
-            ExtractConfig(extraction_mode=ExtractMode.FAST),
-            ExtractConfig(extraction_mode=ExtractMode.BALANCED),
-            ExtractConfig(extraction_mode=ExtractMode.MULTIMODAL),
-            ExtractConfig(extraction_mode=ExtractMode.PREMIUM),
+            ExtractConfig(extraction_mode=ExtractMode.FAST, invalidate_cache=True),
+            ExtractConfig(extraction_mode=ExtractMode.BALANCED, invalidate_cache=True),
+            ExtractConfig(
+                extraction_mode=ExtractMode.MULTIMODAL, invalidate_cache=True
+            ),
+            ExtractConfig(
+                extraction_mode=ExtractMode.PREMIUM,
+                invalidate_cache=True,
+                parse_model="anthropic-sonnet-4.5",
+            ),
        ]

        for input_file in sorted(input_files):
@@ -101,30 +109,24 @@ def extractor():
@pytest.fixture
 def extraction_agent(test_case: ExtractionTestCase, extractor: LlamaExtract):
    """Fixture to create and cleanup extraction agent for each test."""
-    # Create unique name with random UUID (important for CI to avoid conflicts)
    unique_id = uuid.uuid4().hex[:8]
    agent_name = f"{test_case.name}_{unique_id}"

    with open(test_case.schema_path, "r") as f:
        schema = json.load(f)

-    # Clean up any existing agents with this name
-    try:
-        agents = extractor.list_agents()
-        for agent in agents:
-            if agent.name == agent_name:
-                extractor.delete_agent(agent.id)
-    except Exception as e:
-        print(f"Warning: Failed to cleanup existing agent: {str(e)}")
-
-    # Create new agent
-    agent = extractor.create_agent(agent_name, schema, config=test_case.config)
-
-    # Register agent for cleanup at the end of the test session
-    register_agent_for_cleanup(agent.id)
+    agent = create_agent_with_retry(
+        extractor, name=agent_name, data_schema=schema, config=test_case.config
+    )

    yield agent

+    # Inline cleanup -- each worker cleans up its own agents
+    try:
+        extractor.delete_agent(agent.id)
+    except Exception as e:
+        print(f"Warning: Failed to cleanup agent {agent.id}: {e}")
+

@pytest.mark.skipif(
    os.environ.get("LLAMA_CLOUD_API_KEY", "") == "",
@@ -134,7 +136,12 @@ def extraction_agent(test_case: ExtractionTestCase, extractor: LlamaExtract):
 def test_extraction(
    test_case: ExtractionTestCase, extraction_agent: ExtractionAgent
 ) -> None:
-    result = extraction_agent.extract(test_case.input_file).data  # type: ignore
+    # Use a unique external_file_id per upload to avoid cross-test collisions.
+    input_path = Path(test_case.input_file)
+    unique_filename = f"{input_path.stem}-{uuid.uuid4().hex}{input_path.suffix}"
+    result = extraction_agent.extract(
+        SourceText(file=str(input_path), filename=unique_filename)
+    ).data  # type: ignore
    with open(test_case.expected_output, "r") as f:
        expected = json.load(f)
    # TODO: fix the saas_slide test
@@ -8,7 +8,6 @@ from llama_cloud import (
    AutoTransformConfig,
    PipelineCreate,
    PipelineFileCreate,
-    ProjectCreate,
    CompositeRetrievalMode,
    LlamaParseParameters,
    ReRankConfig,
@@ -60,11 +59,15 @@ def local_figures_file() -> str:
 def _setup_index_with_file(
    client: LlamaCloud, index_name: str, remote_file: Tuple[str, str]
 ) -> LlamaCloudIndex:
-    # create project if it doesn't exist
-    project_create = ProjectCreate(name=project_name)
-    project = client.projects.upsert_project(
-        organization_id=organization_id, request=project_create
+    # get project by name
+    projects = client.projects.list_projects(
+        organization_id=organization_id, project_name=project_name
    )
+    if not projects:
+        raise ValueError(
+            f"Project '{project_name}' not found. Please create it first in the LlamaCloud UI."
+        )
+    project = projects[0]

    # create pipeline
    pipeline_create = PipelineCreate(
@@ -78,7 +78,7 @@ async def test_upload_bytes(
    uploaded_file = await file_client.upload_bytes(file_bytes, external_file_id)

    assert isinstance(uploaded_file, File)
-    expected_name = external_file_id if use_presigned_url else "upload"
+    expected_name = external_file_id
    assert uploaded_file.name == expected_name
    assert uploaded_file.external_file_id == external_file_id

@@ -100,7 +100,7 @@ async def test_upload_buffer(
    uploaded_file = await file_client.upload_buffer(buffer, external_file_id, file_size)

    assert isinstance(uploaded_file, File)
-    expected_name = external_file_id if use_presigned_url else "upload"
+    expected_name = external_file_id
    assert uploaded_file.name == expected_name
    assert uploaded_file.external_file_id == external_file_id

@@ -11,10 +11,12 @@ from llama_cloud.types.aggregate_group import AggregateGroup
 from pydantic import BaseModel, Field, ValidationError

 from llama_cloud_services.beta.agent_data.schema import (
+    BoundingBox,
    ExtractedData,
    ExtractedFieldMetadata,
    FieldCitation,
    InvalidExtractionData,
+    PageDimensions,
    TypedAgentData,
    TypedAggregateGroup,
    calculate_overall_confidence,
@@ -421,6 +423,7 @@ def create_extract_run(
    },
    data_schema: Dict[str, Any] = {},
    file: File = create_file(),
+    error: Optional[str] = None,
 ) -> ExtractRun:
    return ExtractRun.parse_obj(
        {
@@ -437,6 +440,7 @@ def create_extract_run(
            "status": "SUCCESS",
            "project_id": str(uuid.uuid4()),
            "from_ui": False,
+            "error": error,
        }
    )

@@ -542,6 +546,46 @@ def test_extracted_data_from_extraction_result_invalid_data():
    assert invalid_data.field_metadata["name"].confidence == 0.9
    assert invalid_data.overall_confidence == 0.9

+    # Verify default error message when no job error present
+    assert exc_info.value.extraction_error is None
+    assert "Not able to parse the extracted data" in str(exc_info.value)
+
+
+def test_extracted_data_from_extraction_result_with_job_error():
+    """Test ExtractedData.from_extraction_result with job-level error prominently displayed."""
+    job_error_message = "Failed to process document: unsupported file format"
+
+    # Create ExtractRun with both invalid data AND a job-level error
+    extract_run = create_extract_run(
+        data={
+            "missing_name": "Valid Name",
+            "age": "not_a_number",
+        },  # Invalid age, missing name
+        extraction_metadata={
+            "name": {"confidence": 0.9},
+        },
+        data_schema={},
+        file=create_file(id="error-file", name="bad_data.pdf"),
+        error=job_error_message,
+    )
+
+    # Should raise InvalidExtractionData with the job error prominently displayed
+    with pytest.raises(InvalidExtractionData) as exc_info:
+        ExtractedData.from_extraction_result(
+            extract_run, Person, metadata={"test": "metadata"}
+        )
+
+    # Verify the exception message prominently shows the job error
+    exception = exc_info.value
+    assert exception.extraction_error == job_error_message
+    assert f"Extraction error: {job_error_message}" == str(exception)
+
+    # Verify the invalid_item contains both errors in metadata
+    invalid_data = exception.invalid_item
+    assert invalid_data.metadata.get("job_error") == job_error_message
+    assert "extraction_error" in invalid_data.metadata  # Validation error still present
+    assert "test" in invalid_data.metadata  # Original metadata preserved
+

 class Dimensions(BaseModel):
    length: Optional[str] = Field(
@@ -663,3 +707,69 @@ def test_field_conflict_in_schema():
    assert isinstance(
        extracted["majority_opinion"]["reasoning"], ExtractedFieldMetadata
    )
+
+
+def test_parse_extracted_field_metadata_with_bounding_boxes():
+    """Test parse_extracted_field_metadata with bounding boxes and page dimensions."""
+    raw_metadata = {
+        "document_type": {
+            "citation": [
+                {
+                    "page": 1,
+                    "matching_text": "FACTURE ORIGINALE",
+                    "bounding_boxes": [{"x": 77.28, "y": 615.12, "w": 70.6, "h": 7.2}],
+                    "page_dimensions": {"width": 222.24, "height": 736.56},
+                }
+            ],
+            "parsing_confidence": 1.0,
+            "extraction_confidence": 0.7252506422636493,
+            "confidence": 0.7252506422636493,
+        },
+        "summary": {
+            "citation": [
+                {
+                    "page": 1,
+                    "matching_text": "FACTURE ORIGINALE",
+                    "bounding_boxes": [{"x": 77.28, "y": 615.12, "w": 70.6, "h": 7.2}],
+                    "page_dimensions": {"width": 222.24, "height": 736.56},
+                },
+                {
+                    "page": 1,
+                    "matching_text": "Café filtre assiette — $1.90",
+                    "bounding_boxes": [
+                        {"x": 10.56, "y": 172.83, "w": 171.85, "h": 497.01}
+                    ],
+                    "page_dimensions": {"width": 222.24, "height": 736.56},
+                },
+            ],
+            "parsing_confidence": 1.0,
+            "extraction_confidence": 0.5700013128334419,
+            "confidence": 0.5700013128334419,
+        },
+    }
+
+    result = parse_extracted_field_metadata(raw_metadata)
+
+    # Verify document_type citation with bounding boxes
+    assert isinstance(result["document_type"], ExtractedFieldMetadata)
+    assert result["document_type"].parsing_confidence == 1.0
+    assert result["document_type"].extraction_confidence == 0.7252506422636493
+    assert result["document_type"].confidence == 0.7252506422636493
+    assert len(result["document_type"].citation) == 1
+
+    citation = result["document_type"].citation[0]
+    assert citation.page == 1
+    assert citation.matching_text == "FACTURE ORIGINALE"
+    assert len(citation.bounding_boxes) == 1
+    assert citation.bounding_boxes[0] == BoundingBox(x=77.28, y=615.12, w=70.6, h=7.2)
+    assert citation.page_dimensions == PageDimensions(width=222.24, height=736.56)
+
+    # Verify summary citation with multiple bounding boxes
+    assert isinstance(result["summary"], ExtractedFieldMetadata)
+    assert len(result["summary"].citation) == 2
+    assert result["summary"].citation[0].bounding_boxes[0].x == 77.28
+    assert result["summary"].citation[1].bounding_boxes[0].x == 10.56
+
+    # Verify round-trip serialization
+    result2 = parse_extracted_field_metadata(result)
+    assert result2 == result
@@ -34,9 +34,10 @@ TEST_PIPELINE = Pipeline(
 def mock_client() -> MagicMock:
    """Mock client with sensible defaults."""
    client = MagicMock()
-    client.projects.upsert_project.return_value = Project(
+    default_project = Project(
        id="default-proj", name=DEFAULT_PROJECT_NAME, organization_id="default-org"
    )
+    client.projects.list_projects.return_value = [default_project]
    client.pipelines.upsert_pipeline.return_value = Pipeline(
        id="default-pipe",
        name="default",
@@ -100,8 +101,8 @@ def test_from_documents_uses_provided_project_id(mock_client: MagicMock) -> None
            project_id=provided_project_id,
        )

-    # Assert - project upsert not called; pipeline uses provided project_id
-    mock_client.projects.upsert_project.assert_not_called()
+    # Assert - project list not called (project_id provided); pipeline uses provided project_id
+    mock_client.projects.list_projects.assert_not_called()
    assert mock_client.pipelines.upsert_pipeline.call_count == 1
    assert (
        mock_client.pipelines.upsert_pipeline.call_args.kwargs["project_id"]
@@ -110,29 +111,29 @@ def test_from_documents_uses_provided_project_id(mock_client: MagicMock) -> None
    assert index.project.id == provided_project_id


-def test_from_documents_upserts_project_when_project_id_missing(
+def test_from_documents_lists_project_when_project_id_missing(
    mock_client: MagicMock,
 ) -> None:
    organization_id = "org-xyz"
    index_name = "my_new_index"

-    # Project is created when project_id is not provided
-    upserted_project = Project(
+    # Project is found by name when project_id is not provided
+    found_project = Project(
        id="proj-999", name=DEFAULT_PROJECT_NAME, organization_id=organization_id
    )
-    mock_client.projects.upsert_project.return_value = upserted_project
+    mock_client.projects.list_projects.return_value = [found_project]

    test_pipeline = Pipeline(
        id="pipe-xyz",
        name=index_name,
-        project_id=upserted_project.id,
+        project_id=found_project.id,
        embedding_config=EMBEDDING_CONFIG,
    )

    with patch.object(
        base,
        "resolve_project_and_pipeline",
-        return_value=(upserted_project, test_pipeline),
+        return_value=(found_project, test_pipeline),
    ):
        docs = [Document(text="world")]
        index = LlamaCloudIndex.from_documents(
@@ -141,15 +142,15 @@ def test_from_documents_upserts_project_when_project_id_missing(
            organization_id=organization_id,
        )

-    # Assert - project was upserted with org id and default project name
-    mock_client.projects.upsert_project.assert_called_once()
-    kwargs = mock_client.projects.upsert_project.call_args.kwargs
+    # Assert - project was listed with org id and default project name
+    mock_client.projects.list_projects.assert_called_once()
+    kwargs = mock_client.projects.list_projects.call_args.kwargs
    assert kwargs["organization_id"] == organization_id
-    assert kwargs["request"].name == DEFAULT_PROJECT_NAME
+    assert kwargs["project_name"] == DEFAULT_PROJECT_NAME

-    # Pipeline created under the upserted project id
+    # Pipeline created under the found project id
    assert (
        mock_client.pipelines.upsert_pipeline.call_args.kwargs["project_id"]
-        == upserted_project.id
+        == found_project.id
    )
-    assert index.project.id == upserted_project.id
+    assert index.project.id == found_project.id
@@ -3,7 +3,8 @@ revision = 3
 requires-python = ">=3.9, <4.0"
 resolution-markers = [
    "python_full_version >= '3.14'",
-    "python_full_version >= '3.11' and python_full_version < '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version == '3.11.*'",
    "python_full_version == '3.10.*'",
    "python_full_version < '3.10'",
 ]
@@ -220,7 +221,8 @@ name = "argon2-cffi-bindings"
 version = "25.1.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.11' and python_full_version < '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version == '3.11.*'",
    "python_full_version == '3.10.*'",
    "python_full_version < '3.10'",
 ]
@@ -589,7 +591,8 @@ version = "8.2.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.14'",
-    "python_full_version >= '3.11' and python_full_version < '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version == '3.11.*'",
    "python_full_version == '3.10.*'",
 ]
 dependencies = [
@@ -720,6 +723,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 ]

+[[package]]
+name = "et-xmlfile"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
+]
+
 [[package]]
 name = "eval-type-backport"
 version = "0.2.2"
@@ -1120,7 +1132,8 @@ version = "8.37.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.14'",
-    "python_full_version >= '3.11' and python_full_version < '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version == '3.11.*'",
    "python_full_version == '3.10.*'",
 ]
 dependencies = [
@@ -1582,21 +1595,21 @@ wheels = [

 [[package]]
 name = "llama-cloud"
-version = "0.1.44"
+version = "0.1.46"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "certifi" },
    { name = "httpx" },
    { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/54/eb/16e31fb0fc4df91b08fa19cc3f28ac6e3c7d4df0bcbb71dd2bf596e9586f/llama_cloud-0.1.44.tar.gz", hash = "sha256:276a2b4f94463da037431ca3063331b3b6be398bbfb003113ee76b7c2a873b53", size = 120502, upload-time = "2025-11-04T00:51:58.578Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/40/f3/f4d6520f8d546e6c5a02f6ebeed5c09774a074b8d2c24ad559ace97a56a6/llama_cloud-0.1.46.tar.gz", hash = "sha256:e86f8791c053590d70cc59e0fc13ce72f9b681a8e658bc61df86d0285288d8ee", size = 127752, upload-time = "2026-01-21T18:40:57.103Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/0a/fabe54c21d5927d626550cb9560a20e51e42468355f5f0fb300f84806e28/llama_cloud-0.1.44-py3-none-any.whl", hash = "sha256:dfdcc4932353711fc8639f14261cbb54a88139b7790ebdd3ed4fde29bbbc0b88", size = 332779, upload-time = "2025-11-04T00:51:57.371Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/3a/6caaea28c8c804add33c91d356ed7d5a5412d6c9598e1450af95a15e0bcd/llama_cloud-0.1.46-py3-none-any.whl", hash = "sha256:6c6546c09c04a038c86d84d42f00eae8fd3bff49991ad3aab844bd866ecdf352", size = 361989, upload-time = "2026-01-21T18:40:54.863Z" },
 ]

 [[package]]
 name = "llama-cloud-services"
-version = "0.6.79"
+version = "0.6.90"
 source = { editable = "." }
 dependencies = [
    { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -1620,10 +1633,15 @@ dev = [
    { name = "ipython", version = "8.37.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
    { name = "jupyter" },
    { name = "mypy" },
+    { name = "openpyxl" },
+    { name = "pandas" },
    { name = "pre-commit" },
+    { name = "pyarrow", version = "21.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
    { name = "pydantic-settings" },
    { name = "pytest" },
    { name = "pytest-asyncio" },
+    { name = "pytest-timeout" },
    { name = "pytest-xdist" },
 ]

@@ -1631,7 +1649,7 @@ dev = [
 requires-dist = [
    { name = "click", specifier = ">=8.1.7,<9" },
    { name = "eval-type-backport", marker = "python_full_version < '3.10'", specifier = ">=0.2.0,<0.3" },
-    { name = "llama-cloud", specifier = "==0.1.44" },
+    { name = "llama-cloud", specifier = "==0.1.46" },
    { name = "llama-index-core", specifier = ">=0.12.0" },
    { name = "packaging", specifier = ">=23.0" },
    { name = "platformdirs", specifier = ">=4.3.7,<5" },
@@ -1648,10 +1666,14 @@ dev = [
    { name = "ipython", specifier = ">=8.12.3,<9" },
    { name = "jupyter", specifier = ">=1.1.1,<2" },
    { name = "mypy", specifier = ">=1.14.1,<2" },
+    { name = "openpyxl" },
+    { name = "pandas" },
    { name = "pre-commit", specifier = "==3.2.0" },
+    { name = "pyarrow" },
    { name = "pydantic-settings", specifier = ">=2.10.1" },
    { name = "pytest", specifier = ">=8.0.0,<9" },
    { name = "pytest-asyncio" },
+    { name = "pytest-timeout", specifier = ">=2.3.1" },
    { name = "pytest-xdist", specifier = ">=3.6.1,<4" },
 ]

@@ -2098,7 +2120,8 @@ version = "3.5"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.14'",
-    "python_full_version >= '3.11' and python_full_version < '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version == '3.11.*'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
 wheels = [
@@ -2284,7 +2307,8 @@ version = "2.3.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.14'",
-    "python_full_version >= '3.11' and python_full_version < '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version == '3.11.*'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/37/7d/3fec4199c5ffb892bed55cff901e4f39a58c81df9c44c280499e92cad264/numpy-2.3.2.tar.gz", hash = "sha256:e0486a11ec30cdecb53f184d496d1c6a20786c81e55e41640270130056f8ee48", size = 20489306, upload-time = "2025-07-24T21:32:07.553Z" }
 wheels = [
@@ -2363,6 +2387,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/78/e3/6690b3f85a05506733c7e90b577e4762517404ea78bab2ca3a5cb1aeb78d/numpy-2.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6936aff90dda378c09bea075af0d9c675fe3a977a9d2402f95a87f440f59f619", size = 12977811, upload-time = "2025-07-24T21:29:18.234Z" },
 ]

+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "et-xmlfile" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" },
+]
+
 [[package]]
 name = "orderly-set"
 version = "5.5.0"
@@ -2390,6 +2426,76 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]

+[[package]]
+name = "pandas"
+version = "2.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "python-dateutil" },
+    { name = "pytz" },
+    { name = "tzdata" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/f7/f425a00df4fcc22b292c6895c6831c0c8ae1d9fac1e024d16f98a9ce8749/pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", size = 11555763, upload-time = "2025-09-29T23:16:53.287Z" },
+    { url = "https://files.pythonhosted.org/packages/13/4f/66d99628ff8ce7857aca52fed8f0066ce209f96be2fede6cef9f84e8d04f/pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", size = 10801217, upload-time = "2025-09-29T23:17:04.522Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/03/3fc4a529a7710f890a239cc496fc6d50ad4a0995657dccc1d64695adb9f4/pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", size = 12148791, upload-time = "2025-09-29T23:17:18.444Z" },
+    { url = "https://files.pythonhosted.org/packages/40/a8/4dac1f8f8235e5d25b9955d02ff6f29396191d4e665d71122c3722ca83c5/pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", size = 12769373, upload-time = "2025-09-29T23:17:35.846Z" },
+    { url = "https://files.pythonhosted.org/packages/df/91/82cc5169b6b25440a7fc0ef3a694582418d875c8e3ebf796a6d6470aa578/pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", size = 13200444, upload-time = "2025-09-29T23:17:49.341Z" },
+    { url = "https://files.pythonhosted.org/packages/10/ae/89b3283800ab58f7af2952704078555fa60c807fff764395bb57ea0b0dbd/pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", size = 13858459, upload-time = "2025-09-29T23:18:03.722Z" },
+    { url = "https://files.pythonhosted.org/packages/85/72/530900610650f54a35a19476eca5104f38555afccda1aa11a92ee14cb21d/pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", size = 11346086, upload-time = "2025-09-29T23:18:18.505Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790, upload-time = "2025-09-29T23:18:30.065Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831, upload-time = "2025-09-29T23:38:56.071Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267, upload-time = "2025-09-29T23:18:41.627Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281, upload-time = "2025-09-29T23:18:56.834Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453, upload-time = "2025-09-29T23:19:09.247Z" },
+    { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361, upload-time = "2025-09-29T23:19:25.342Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702, upload-time = "2025-09-29T23:19:38.296Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846, upload-time = "2025-09-29T23:19:48.856Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618, upload-time = "2025-09-29T23:39:08.659Z" },
+    { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212, upload-time = "2025-09-29T23:19:59.765Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693, upload-time = "2025-09-29T23:20:14.098Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002, upload-time = "2025-09-29T23:20:26.76Z" },
+    { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971, upload-time = "2025-09-29T23:20:41.344Z" },
+    { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload-time = "2025-09-29T23:20:54.139Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671, upload-time = "2025-09-29T23:21:05.024Z" },
+    { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807, upload-time = "2025-09-29T23:21:15.979Z" },
+    { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872, upload-time = "2025-09-29T23:21:27.165Z" },
+    { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371, upload-time = "2025-09-29T23:21:40.532Z" },
+    { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333, upload-time = "2025-09-29T23:21:55.77Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120, upload-time = "2025-09-29T23:22:10.109Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991, upload-time = "2025-09-29T23:25:04.889Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227, upload-time = "2025-09-29T23:22:24.343Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056, upload-time = "2025-09-29T23:22:37.762Z" },
+    { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189, upload-time = "2025-09-29T23:22:51.688Z" },
+    { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912, upload-time = "2025-09-29T23:23:05.042Z" },
+    { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160, upload-time = "2025-09-29T23:23:28.57Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233, upload-time = "2025-09-29T23:24:24.876Z" },
+    { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635, upload-time = "2025-09-29T23:25:52.486Z" },
+    { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079, upload-time = "2025-09-29T23:26:33.204Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049, upload-time = "2025-09-29T23:27:15.384Z" },
+    { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638, upload-time = "2025-09-29T23:27:51.625Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834, upload-time = "2025-09-29T23:28:21.289Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925, upload-time = "2025-09-29T23:28:58.261Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071, upload-time = "2025-09-29T23:32:27.484Z" },
+    { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504, upload-time = "2025-09-29T23:29:31.47Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702, upload-time = "2025-09-29T23:29:54.591Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535, upload-time = "2025-09-29T23:30:21.003Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582, upload-time = "2025-09-29T23:30:43.391Z" },
+    { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963, upload-time = "2025-09-29T23:31:10.009Z" },
+    { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" },
+    { url = "https://files.pythonhosted.org/packages/56/b4/52eeb530a99e2a4c55ffcd352772b599ed4473a0f892d127f4147cf0f88e/pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2", size = 11567720, upload-time = "2025-09-29T23:33:06.209Z" },
+    { url = "https://files.pythonhosted.org/packages/48/4a/2d8b67632a021bced649ba940455ed441ca854e57d6e7658a6024587b083/pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8", size = 10810302, upload-time = "2025-09-29T23:33:35.846Z" },
+    { url = "https://files.pythonhosted.org/packages/13/e6/d2465010ee0569a245c975dc6967b801887068bc893e908239b1f4b6c1ac/pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff", size = 12154874, upload-time = "2025-09-29T23:33:49.939Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/18/aae8c0aa69a386a3255940e9317f793808ea79d0a525a97a903366bb2569/pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29", size = 12790141, upload-time = "2025-09-29T23:34:05.655Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/26/617f98de789de00c2a444fbe6301bb19e66556ac78cff933d2c98f62f2b4/pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73", size = 13208697, upload-time = "2025-09-29T23:34:21.835Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/fb/25709afa4552042bd0e15717c75e9b4a2294c3dc4f7e6ea50f03c5136600/pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9", size = 13879233, upload-time = "2025-09-29T23:34:35.079Z" },
+    { url = "https://files.pythonhosted.org/packages/98/af/7be05277859a7bc399da8ba68b88c96b27b48740b6cf49688899c6eb4176/pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa", size = 11359119, upload-time = "2025-09-29T23:34:46.339Z" },
+]
+
 [[package]]
 name = "pandocfilters"
 version = "1.5.1"
@@ -2735,6 +2841,122 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" },
 ]

+[[package]]
+name = "pyarrow"
+version = "21.0.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/d9/110de31880016e2afc52d8580b397dbe47615defbf09ca8cf55f56c62165/pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26", size = 31196837, upload-time = "2025-07-18T00:54:34.755Z" },
+    { url = "https://files.pythonhosted.org/packages/df/5f/c1c1997613abf24fceb087e79432d24c19bc6f7259cab57c2c8e5e545fab/pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79", size = 32659470, upload-time = "2025-07-18T00:54:38.329Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ed/b1589a777816ee33ba123ba1e4f8f02243a844fed0deec97bde9fb21a5cf/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb", size = 41055619, upload-time = "2025-07-18T00:54:42.172Z" },
+    { url = "https://files.pythonhosted.org/packages/44/28/b6672962639e85dc0ac36f71ab3a8f5f38e01b51343d7aa372a6b56fa3f3/pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51", size = 42733488, upload-time = "2025-07-18T00:54:47.132Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/cc/de02c3614874b9089c94eac093f90ca5dfa6d5afe45de3ba847fd950fdf1/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a", size = 43329159, upload-time = "2025-07-18T00:54:51.686Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/3e/99473332ac40278f196e105ce30b79ab8affab12f6194802f2593d6b0be2/pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594", size = 45050567, upload-time = "2025-07-18T00:54:56.679Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/f5/c372ef60593d713e8bfbb7e0c743501605f0ad00719146dc075faf11172b/pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634", size = 26217959, upload-time = "2025-07-18T00:55:00.482Z" },
+    { url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" },
+    { url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" },
+    { url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" },
+    { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" },
+    { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" },
+    { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" },
+    { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" },
+    { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" },
+    { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/cc/ce4939f4b316457a083dc5718b3982801e8c33f921b3c98e7a93b7c7491f/pyarrow-21.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a7f6524e3747e35f80744537c78e7302cd41deee8baa668d56d55f77d9c464b3", size = 31211248, upload-time = "2025-07-18T00:56:59.7Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/c2/7a860931420d73985e2f340f06516b21740c15b28d24a0e99a900bb27d2b/pyarrow-21.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:203003786c9fd253ebcafa44b03c06983c9c8d06c3145e37f1b76a1f317aeae1", size = 32676896, upload-time = "2025-07-18T00:57:03.884Z" },
+    { url = "https://files.pythonhosted.org/packages/68/a8/197f989b9a75e59b4ca0db6a13c56f19a0ad8a298c68da9cc28145e0bb97/pyarrow-21.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b4d97e297741796fead24867a8dabf86c87e4584ccc03167e4a811f50fdf74d", size = 41067862, upload-time = "2025-07-18T00:57:07.587Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/82/6ecfa89487b35aa21accb014b64e0a6b814cc860d5e3170287bf5135c7d8/pyarrow-21.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:898afce396b80fdda05e3086b4256f8677c671f7b1d27a6976fa011d3fd0a86e", size = 42747508, upload-time = "2025-07-18T00:57:13.917Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/b7/ba252f399bbf3addc731e8643c05532cf32e74cebb5e32f8f7409bc243cf/pyarrow-21.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:067c66ca29aaedae08218569a114e413b26e742171f526e828e1064fcdec13f4", size = 43345293, upload-time = "2025-07-18T00:57:19.828Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0a/a20819795bd702b9486f536a8eeb70a6aa64046fce32071c19ec8230dbaa/pyarrow-21.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0c4e75d13eb76295a49e0ea056eb18dbd87d81450bfeb8afa19a7e5a75ae2ad7", size = 45060670, upload-time = "2025-07-18T00:57:24.477Z" },
+    { url = "https://files.pythonhosted.org/packages/10/15/6b30e77872012bbfe8265d42a01d5b3c17ef0ac0f2fae531ad91b6a6c02e/pyarrow-21.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdc4c17afda4dab2a9c0b79148a43a7f4e1094916b3e18d8975bfd6d6d52241f", size = 26227521, upload-time = "2025-07-18T00:57:29.119Z" },
+]
+
+[[package]]
+name = "pyarrow"
+version = "22.0.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version == '3.11.*'",
+    "python_full_version == '3.10.*'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968, upload-time = "2025-10-24T10:03:31.21Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/41/3184b8192a120306270c5307f105b70320fdaa592c99843c5ef78aaefdcf/pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace", size = 35942085, upload-time = "2025-10-24T10:03:38.146Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/3d/a1eab2f6f08001f9fb714b8ed5cfb045e2fe3e3e3c0c221f2c9ed1e6d67d/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce", size = 44964613, upload-time = "2025-10-24T10:03:46.516Z" },
+    { url = "https://files.pythonhosted.org/packages/46/46/a1d9c24baf21cfd9ce994ac820a24608decf2710521b29223d4334985127/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48", size = 47627059, upload-time = "2025-10-24T10:03:55.353Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/4c/f711acb13075c1391fd54bc17e078587672c575f8de2a6e62509af026dcf/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340", size = 47947043, upload-time = "2025-10-24T10:04:05.408Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/70/1f3180dd7c2eab35c2aca2b29ace6c519f827dcd4cfeb8e0dca41612cf7a/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653", size = 50206505, upload-time = "2025-10-24T10:04:15.786Z" },
+    { url = "https://files.pythonhosted.org/packages/80/07/fea6578112c8c60ffde55883a571e4c4c6bc7049f119d6b09333b5cc6f73/pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84", size = 28101641, upload-time = "2025-10-24T10:04:22.57Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" },
+    { url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" },
+    { url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" },
+    { url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" },
+    { url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" },
+    { url = "https://files.pythonhosted.org/packages/13/95/aec81f781c75cd10554dc17a25849c720d54feafb6f7847690478dcf5ef8/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe", size = 47726315, upload-time = "2025-10-24T10:05:47.314Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/d4/74ac9f7a54cfde12ee42734ea25d5a3c9a45db78f9def949307a92720d37/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e", size = 47990906, upload-time = "2025-10-24T10:05:58.254Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/71/fedf2499bf7a95062eafc989ace56572f3343432570e1c54e6599d5b88da/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9", size = 50306783, upload-time = "2025-10-24T10:06:08.08Z" },
+    { url = "https://files.pythonhosted.org/packages/68/ed/b202abd5a5b78f519722f3d29063dda03c114711093c1995a33b8e2e0f4b/pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d", size = 27972883, upload-time = "2025-10-24T10:06:14.204Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" },
+    { url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" },
+    { url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" },
+    { url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" },
+    { url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/b0/0fa4d28a8edb42b0a7144edd20befd04173ac79819547216f8a9f36f9e50/pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d", size = 34224062, upload-time = "2025-10-24T10:08:14.101Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/a8/7a719076b3c1be0acef56a07220c586f25cd24de0e3f3102b438d18ae5df/pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9", size = 35990057, upload-time = "2025-10-24T10:08:21.842Z" },
+    { url = "https://files.pythonhosted.org/packages/89/3c/359ed54c93b47fb6fe30ed16cdf50e3f0e8b9ccfb11b86218c3619ae50a8/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7", size = 45068002, upload-time = "2025-10-24T10:08:29.034Z" },
+    { url = "https://files.pythonhosted.org/packages/55/fc/4945896cc8638536ee787a3bd6ce7cec8ec9acf452d78ec39ab328efa0a1/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde", size = 47737765, upload-time = "2025-10-24T10:08:38.559Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/5e/7cb7edeb2abfaa1f79b5d5eb89432356155c8426f75d3753cbcb9592c0fd/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc", size = 48048139, upload-time = "2025-10-24T10:08:46.784Z" },
+    { url = "https://files.pythonhosted.org/packages/88/c6/546baa7c48185f5e9d6e59277c4b19f30f48c94d9dd938c2a80d4d6b067c/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0", size = 50314244, upload-time = "2025-10-24T10:08:55.771Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/79/755ff2d145aafec8d347bf18f95e4e81c00127f06d080135dfc86aea417c/pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730", size = 28757501, upload-time = "2025-10-24T10:09:59.891Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/d2/237d75ac28ced3147912954e3c1a174df43a95f4f88e467809118a8165e0/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2", size = 34355506, upload-time = "2025-10-24T10:09:02.953Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/2c/733dfffe6d3069740f98e57ff81007809067d68626c5faef293434d11bd6/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70", size = 36047312, upload-time = "2025-10-24T10:09:10.334Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/2b/29d6e3782dc1f299727462c1543af357a0f2c1d3c160ce199950d9ca51eb/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754", size = 45081609, upload-time = "2025-10-24T10:09:18.61Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/42/aa9355ecc05997915af1b7b947a7f66c02dcaa927f3203b87871c114ba10/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91", size = 47703663, upload-time = "2025-10-24T10:09:27.369Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/62/45abedde480168e83a1de005b7b7043fd553321c1e8c5a9a114425f64842/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c", size = 48066543, upload-time = "2025-10-24T10:09:34.908Z" },
+    { url = "https://files.pythonhosted.org/packages/84/e9/7878940a5b072e4f3bf998770acafeae13b267f9893af5f6d4ab3904b67e/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80", size = 50288838, upload-time = "2025-10-24T10:09:44.394Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" },
+]
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -2923,6 +3145,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c7/9d/bf86eddabf8c6c9cb1ea9a869d6873b46f105a5d292d3a6f7071f5b07935/pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf", size = 15157, upload-time = "2025-07-16T04:29:24.929Z" },
 ]

+[[package]]
+name = "pytest-timeout"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" },
+]
+
 [[package]]
 name = "pytest-xdist"
 version = "3.8.0"
@@ -2969,6 +3203,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163, upload-time = "2025-03-07T07:08:25.627Z" },
 ]

+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+]
+
 [[package]]
 name = "pywin32"
 version = "311"
@@ -3860,6 +4103,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
 ]

+[[package]]
+name = "tzdata"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
+]
+
 [[package]]
 name = "uri-template"
 version = "1.3.0"
@@ -1,5 +1,50 @@
 # llama-cloud-services

+## 0.5.4
+
+### Patch Changes
+
+- 2358df1: add deprecation notices
+
+## 0.5.3
+
+### Patch Changes
+
+- d7864af: bugfixes in retry logic for LlamaExtract and LlamaClassify
+
+## 0.5.2
+
+### Patch Changes
+
+- 997bcc8: Add types for bounding boxes
+
+## 0.5.1
+
+### Patch Changes
+
+- d5b18a0: Fix publishing
+
+## 0.5.0
+
+### Minor Changes
+
+- 576c3d9: feat: support zod v4 & v3
+
+  Adds support for zod v4 while maintaining backward compatibility with v3.
+  - Updated zod peer dependency to accept both v3 and v4: `^3.25.76 || ^4.0.0`
+  - Migrated all import statements to use `zod/v4` import path for compatibility
+
+### Patch Changes
+
+- c8321d2: Improve parse results polling
+- 576c3d9: Support zod v3 an v4
+
+## 0.4.3
+
+### Patch Changes
+
+- 71db318: Add tier and version
+
 ## 0.4.2

 ### Patch Changes
@@ -1,12 +1,12 @@
 {
  "name": "llama-cloud-services",
-  "version": "0.4.2",
+  "version": "0.5.4",
  "type": "module",
  "license": "MIT",
  "scripts": {
    "get-openapi": "node ./scripts/get-openapi.js",
    "generate": "./node_modules/.bin/openapi-ts",
-    "build": "pnpm run generate && bunchee",
+    "build": "bunchee",
    "dev": "bunchee --watch",
    "lint": "eslint src/ --ignore-pattern client/*.ts --no-warn-ignored",
    "format": "prettier --write ./src/ tests/",
@@ -116,9 +116,9 @@
    "@eslint/js": "^9.32.0",
    "@hey-api/client-fetch": "^0.10.1",
    "@hey-api/openapi-ts": "^0.67.5",
-    "@llamaindex/core": "^0.6.19",
+    "@llamaindex/core": "^0.6.22",
    "@llamaindex/env": "^0.1.30",
-    "@llamaindex/workflow-core": "^0.4.1",
+    "@llamaindex/workflow-core": "^1.3.3",
    "@types/node": "^20.19.9",
    "@typescript-eslint/eslint-plugin": "^8.38.0",
    "@typescript-eslint/parser": "^8.38.0",
@@ -131,18 +131,19 @@
    "turbo": "^2.5.5",
    "typescript": "^5.8.3",
    "typescript-eslint": "^8.38.0",
-    "vitest": "^2.0.0"
+    "vitest": "^2.0.0",
+    "zod": "^4.1.13"
  },
  "peerDependencies": {
    "@llamaindex/core": "^0.6.19",
    "@llamaindex/env": "^0.1.30",
-    "@llamaindex/workflow-core": "^0.4.1"
+    "@llamaindex/workflow-core": "^1.3.3",
+    "zod": "^3.25.0 || ^4.0.0"
  },
  "dependencies": {
    "ajv": "^8.17.1",
    "file-type": "^21.0.0",
-    "p-retry": "^6.2.1",
-    "zod": "^3.25.76"
+    "p-retry": "^6.2.1"
  },
  "packageManager": "pnpm@10.8.1"
 }
@@ -1,7 +1,7 @@
 import {
  addFilesToPipelineApiApiV1PipelinesPipelineIdFilesPut,
  getPipelineFileStatusApiV1PipelinesPipelineIdFilesFileIdStatusGet,
-  listPipelineFilesApiV1PipelinesPipelineIdFilesGet,
+  listPipelineFiles2ApiV1PipelinesPipelineIdFiles2Get,
  listProjectsApiV1ProjectsGet,
  readFileContentApiV1FilesIdContentGet,
  searchPipelinesApiV1PipelinesGet,
@@ -97,21 +97,20 @@ export class LLamaCloudFileService {
   */
  public static async getFileUrl(pipelineId: string, filename: string) {
    initService();
-    const { data: allPipelineFiles } =
-      await listPipelineFilesApiV1PipelinesPipelineIdFilesGet({
-        path: {
-          pipeline_id: pipelineId,
-        },
-        throwOnError: true,
-      });
-    const file = allPipelineFiles.find((file) => file.name === filename);
+    const response = await listPipelineFiles2ApiV1PipelinesPipelineIdFiles2Get({
+      path: {
+        pipeline_id: pipelineId,
+      },
+      throwOnError: true,
+    });
+    const file = response.data.files.find((file) => file.name === filename);
    if (!file?.file_id) return null;
    const { data: fileContent } = await readFileContentApiV1FilesIdContentGet({
      path: {
        id: file.file_id,
      },
      query: {
-        project_id: file.project_id,
+        project_id: file.project_id || null,
      },
      throwOnError: true,
    });
@@ -2,11 +2,14 @@ export { AgentClient, createAgentDataClient } from "./client";

 export type {
  AggregateAgentDataOptions,
+  BoundingBox,
  ComparisonOperator,
  ExtractedData,
  ExtractedFieldMetadata,
  ExtractedFieldMetadataDict,
+  FieldCitation,
  FilterOperation,
+  PageDimensions,
  SearchAgentDataOptions,
  StatusType,
  TypedAgentData,
@@ -28,6 +28,44 @@ export type ComparisonOperator =
 */
 export type FilterOperation = RawFilterOperation;

+/**
+ * Bounding box coordinates for a citation location on a page
+ */
+export interface BoundingBox {
+  /** X coordinate of the bounding box origin */
+  x: number;
+  /** Y coordinate of the bounding box origin */
+  y: number;
+  /** Width of the bounding box */
+  w: number;
+  /** Height of the bounding box */
+  h: number;
+}
+
+/**
+ * Dimensions of a page in the source document
+ */
+export interface PageDimensions {
+  /** Width of the page */
+  width: number;
+  /** Height of the page */
+  height: number;
+}
+
+/**
+ * Citation information for an extracted field
+ */
+export interface FieldCitation {
+  /** The page number that the field occurred on */
+  page?: number;
+  /** The original text this field's value was derived from */
+  matching_text?: string;
+  /** Bounding boxes indicating where the citation appears on the page */
+  bounding_boxes?: BoundingBox[];
+  /** Dimensions of the page containing the citation */
+  page_dimensions?: PageDimensions;
+}
+
 /**
 * Metadata for an extracted field, including confidence and citation information
 */
@@ -38,16 +76,11 @@ export interface ExtractedFieldMetadata {
  confidence?: number;
  /** The confidence score for the field based on the extracted text only */
  extraction_confidence?: number;
+  /** The confidence score for the field based on the parsing/OCR quality */
+  parsing_confidence?: number;
  citation?: FieldCitation[];
 }

-export interface FieldCitation {
-  /** The page number that the field occurred on */
-  page?: number;
-  /** The original text this field's value was derived from */
-  matching_text?: string;
-}
-
 /**
 * Dictionary mapping field names to their metadata
 * Values can be ExtractedFieldMetadata objects, nested dictionaries, or arrays
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Logan	f385e96ab8	Delete parse.md	2026-03-24 19:27:52 -06:00
Logan	c3e4696b5f	Delete index.md	2026-03-24 19:27:41 -06:00
Logan	1e40c9cf94	Delete extract.md	2026-03-24 19:27:25 -06:00
Logan	802bc2a9f8	Add deprecation notice and clean up README Added deprecation notice and removed outdated content.	2026-03-24 19:26:59 -06:00
Neeraj Pradhan	5ea758b853	More robust extract tests with pytest xdist (#1117 )	2026-02-16 16:16:15 -08:00
dependabot[bot]	208b6f2fa5	build(deps): bump slackapi/slack-github-action from 1.27.0 to 2.1.1 (#1092 ) Bumps [slackapi/slack-github-action](https://github.com/slackapi/slack-github-action) from 1.27.0 to 2.1.1. - [Release notes](https://github.com/slackapi/slack-github-action/releases) - [Commits](https://github.com/slackapi/slack-github-action/compare/v1.27.0...v2.1.1) --- updated-dependencies: - dependency-name: slackapi/slack-github-action dependency-version: 2.1.1 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-02-14 21:03:05 -06:00
github-actions[bot]	e1b9143f79	chore: version packages (#1116 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-02-13 15:29:09 -08:00
Neeraj Pradhan	232c55bd6a	Bump up patch version (#1115 )	2026-02-13 15:20:52 -08:00
Neeraj Pradhan	ab6f2f8da5	Allows xlsx files in the sdk for extract (#1114 )	2026-02-13 14:44:25 -08:00
github-actions[bot]	66c2639ec8	chore: version packages (#1112 )	2026-02-11 15:18:43 -06:00
Logan	da1916c69f	more loudly deprecate ancient llama-parse package (#1111 )	2026-02-11 15:16:01 -06:00
Neeraj Pradhan	345e272573	Lower frequency for e2e tests (#1110 )	2026-02-11 09:07:15 -08:00
github-actions[bot]	d70fbac1ce	chore: version packages (#1103 )	2026-02-02 11:46:39 -06:00
Logan	2358df10c6	add notice (don't merge until ready) (#1065 )	2026-02-02 11:42:47 -06:00
Neeraj Pradhan	829628cc86	Use unique filenames when running dist tests (#1101 )	2026-01-30 14:00:27 -08:00
Neeraj Pradhan	42b7bbd1ae	Use sonnet when testing premium mode in extract e2e (#1098 ) * Use sonnet when testing premium mode in extract e2e * fix parse model	2026-01-27 16:16:48 -08:00
Neeraj Pradhan	38da9a52d7	Invalidate cache when running extract tests (#1097 )	2026-01-26 17:33:23 -08:00
Neeraj Pradhan	1e7ec40ee7	Fix verbose logging on slack channel (#1096 )	2026-01-26 17:12:50 -08:00
Neeraj Pradhan	dd83c1a9d0	Add retries to all extract sdk functions uniformly (#1095 )	2026-01-26 12:05:16 -08:00
Neeraj Pradhan	7cb83f5cd3	Change cron schedule for hourly extract tests (#1094 )	2026-01-26 10:15:34 -08:00
Neeraj Pradhan	b05266be6d	Try to reparse scheduled workflow (#1093 )	2026-01-26 09:56:22 -08:00
Neeraj Pradhan	eab4798165	Force github reparse of the workflow (#1090 )	2026-01-23 11:36:28 -08:00
Neeraj Pradhan	b174fa8fab	Run hourly extract tests to catch SDK schema drifts (#1089 ) * Run hourly extract tests to catch SDK schema drifts * fix url * fix prod/staging env	2026-01-22 18:18:45 -08:00
github-actions[bot]	b12ffef916	chore: version packages (#1087 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-01-21 12:44:43 -08:00
Neeraj Pradhan	07ec282257	Bump up patch version for python packages (#1086 )	2026-01-21 12:30:23 -08:00
Neeraj Pradhan	013b689812	Bump up minor version for python packages (#1085 )	2026-01-21 12:13:13 -08:00
Adrian Lyjak	3040951cb8	Use error description in invalid extraction error (#1081 ) * fix: display extraction job error in InvalidExtractionData exception Refactored InvalidExtractionData to read the `error` field from ExtractRun and prominently display it in the exception message. The job-level error is now stored in the `extraction_error` attribute and included in the invalid_item's metadata as `job_error`. * Create three-yaks-beg.md --------- Co-authored-by: Claude <noreply@anthropic.com>	2026-01-18 17:43:21 -05:00
github-actions[bot]	9239498945	chore: version packages (#1076 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-01-14 19:15:05 +01:00
Pierre-Loic Doulcet	19cbb25631	remove extension filter (#1075 ) * remove extension filter * changeset * Update ninety-goats-look.md Make it a patch version * Update package.json back out of version bump * Update pyproject.toml back out of version bump * Update package.json back out of version bump * Update pyproject.toml back out of version bump --------- Co-authored-by: Adrian Lyjak <adrianlyjak@gmail.com>	2026-01-14 19:13:39 +01:00
github-actions[bot]	812e2f7d72	chore: version packages (#1073 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-01-12 19:03:13 +01:00
Clelia (Astra) Bertelli	d7864afe3f	fix: bug fix retry logic in Classify and Extract (#1066 ) * fix: bug fix retry logic in Classify and Extract * chore: apply suggestion * chore: add PARTIAL_SUCCESS to classify	2026-01-12 18:57:40 +01:00
github-actions[bot]	ade8d027a5	chore: version packages (#1071 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-01-09 20:29:00 -05:00
Adrian Lyjak	997bcc8531	forgot ts changeset (#1070 )	2026-01-09 20:23:29 -05:00
github-actions[bot]	8be554c234	chore: version packages (#1068 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-01-09 18:56:51 -05:00
Adrian Lyjak	f777cab0c5	Add bounding box type support to TS too (#1069 ) ts too	2026-01-09 18:55:16 -05:00
Adrian Lyjak	b9b83c953d	Parse bounding boxes from extract jobs results in agent data (#1067 )	2026-01-09 18:47:57 -05:00
github-actions[bot]	3ec7024626	chore: version packages (#1058 )	2025-12-10 11:53:30 -06:00
Logan	d5b18a03fa	Remove `generate` from build path to fix publishing (#1057 )	2025-12-10 11:52:43 -06:00
Clelia (Astra) Bertelli	18dd04b6de	docs: correct links in readme (#1056 )	2025-12-10 17:08:58 +01:00
github-actions[bot]	685a5e6ccc	chore: version packages (#1054 )	2025-12-09 15:30:13 -06:00
Jim Geurts	576c3d9076	feat: support zod v4 & v3 (#1052 )	2025-12-09 15:29:23 -06:00
Logan	c8321d2bc5	improve parse ts polling (#1053 )	2025-12-09 15:21:19 -06:00
Tuana Çelik	131bbed7aa	batch parse sctript with asyncio (#1051 ) * batch parse sctript with asyncio * lint --------- Co-authored-by: Logan Markewich <logan.markewich@live.com>	2025-12-08 18:50:11 +01:00
Javier Torres	41c8ac2348	docs: Split Example Notebook (#1044 ) * split notebook * Lint	2025-12-08 13:57:20 +01:00
github-actions[bot]	32c53cdf96	chore: version packages (#1046 )	2025-12-04 20:43:29 -06:00
Logan	71db318fc2	add tier/version to api (#1045 )	2025-12-04 20:42:17 -06:00
George He	dac0f79e51	Fix sheets API client (#1032 )	2025-12-03 16:39:47 -06:00
github-actions[bot]	32487763d5	chore: version packages (#1043 )	2025-12-03 14:52:26 -06:00
Daniel Bustamante Ospina	06c3c556e6	Add new fields to `SpreadsheetParsingConfig` and update validation tests (#1042 )	2025-12-03 14:50:23 -06:00
github-actions[bot]	e5dcaa83df	chore: version packages (#1041 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-12-03 11:03:36 -08:00
Neeraj Pradhan	1b7198dc62	Bump llama cloud services and parse versions (#1040 )	2025-12-03 10:39:35 -08:00
github-actions[bot]	9cfe074206	chore: version packages (#1039 )	2025-12-02 12:16:50 -06:00
Logan	ae30990ada	line level bbox (#1038 )	2025-12-02 12:12:17 -06:00
github-actions[bot]	8f1c359abc	chore: version packages (#1037 )	2025-12-02 09:50:07 -06:00
Logan	0a110de9c7	Dummy release (#1036 )	2025-12-02 09:45:52 -06:00
github-actions[bot]	d705b16923	chore: version packages (#1035 )	2025-12-02 09:43:20 -06:00
Logan	ca781132c8	No more presigned URLs by default (#1034 )	2025-12-02 09:41:49 -06:00
Roman Isecke	7a68b0fb68	docs: add batch parse directory example notebook (#1009 ) * create notebook to parse a batch of documents * remove local dev code * tidy * don't git track the sample pdfs * update notebook to use client * add logic to fetch parse results using job id from batch item * generate example for fetching results via parse job id * fix linting * convert notebook to use httpx rather than client for now * fix linting	2025-12-01 13:57:18 -05:00