Compare commits
507 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f385e96ab8 | |||
| c3e4696b5f | |||
| 1e40c9cf94 | |||
| 802bc2a9f8 | |||
| 5ea758b853 | |||
| 208b6f2fa5 | |||
| e1b9143f79 | |||
| 232c55bd6a | |||
| ab6f2f8da5 | |||
| 66c2639ec8 | |||
| da1916c69f | |||
| 345e272573 | |||
| d70fbac1ce | |||
| 2358df10c6 | |||
| 829628cc86 | |||
| 42b7bbd1ae | |||
| 38da9a52d7 | |||
| 1e7ec40ee7 | |||
| dd83c1a9d0 | |||
| 7cb83f5cd3 | |||
| b05266be6d | |||
| eab4798165 | |||
| b174fa8fab | |||
| b12ffef916 | |||
| 07ec282257 | |||
| 013b689812 | |||
| 3040951cb8 | |||
| 9239498945 | |||
| 19cbb25631 | |||
| 812e2f7d72 | |||
| d7864afe3f | |||
| ade8d027a5 | |||
| 997bcc8531 | |||
| 8be554c234 | |||
| f777cab0c5 | |||
| b9b83c953d | |||
| 3ec7024626 | |||
| d5b18a03fa | |||
| 18dd04b6de | |||
| 685a5e6ccc | |||
| 576c3d9076 | |||
| c8321d2bc5 | |||
| 131bbed7aa | |||
| 41c8ac2348 | |||
| 32c53cdf96 | |||
| 71db318fc2 | |||
| dac0f79e51 | |||
| 32487763d5 | |||
| 06c3c556e6 | |||
| e5dcaa83df | |||
| 1b7198dc62 | |||
| 9cfe074206 | |||
| ae30990ada | |||
| 8f1c359abc | |||
| 0a110de9c7 | |||
| d705b16923 | |||
| ca781132c8 | |||
| 7a68b0fb68 | |||
| 87dec5433d | |||
| 99f4eba8d0 | |||
| 54561e2dd2 | |||
| bfaec79a8f | |||
| 3e0e522a6b | |||
| f70b6d87ec | |||
| 693b5b83b1 | |||
| ad38ef5cd7 | |||
| 4c4c6e6575 | |||
| 740b47d9dc | |||
| f3233deb2e | |||
| fd45127678 | |||
| 0506c88735 | |||
| 4bc9eb6c0d | |||
| 5a3dac655c | |||
| 519254efbe | |||
| 6ab56b79f3 | |||
| e020e3e2b1 | |||
| f293547910 | |||
| 662bc37462 | |||
| 9f1ef4ef1f | |||
| 1243573924 | |||
| 407292b177 | |||
| a7df7c0912 | |||
| c758144bfe | |||
| fee516dd19 | |||
| 032fbd5768 | |||
| 970e864514 | |||
| d0649ece6e | |||
| 5d4cabd843 | |||
| 9070a6ac16 | |||
| 4f24f537f6 | |||
| 8859a203e2 | |||
| b091364054 | |||
| 43b1a013ca | |||
| f81532e7f2 | |||
| 986d3987d3 | |||
| 1bf522311f | |||
| 24166dcfc8 | |||
| bfb7f3973f | |||
| 979f643c77 | |||
| aefd89cf1b | |||
| 8ea2b2c64e | |||
| 4a9a2a21d8 | |||
| e6a7939206 | |||
| 104a03e829 | |||
| 6e0f2f4ca0 | |||
| 0708d11f8a | |||
| be19185503 | |||
| 7571b0d6c4 | |||
| ad6734bf80 | |||
| 9ec2a8322e | |||
| 51011b9f30 | |||
| 09805f9e15 | |||
| 8ced6f6eab | |||
| 081ddeca34 | |||
| 2460908789 | |||
| c226d6a54c | |||
| 5d4c682eb2 | |||
| f72d3535c8 | |||
| 1ea09a366e | |||
| d4bbeb6389 | |||
| d028397603 | |||
| 35ea8476db | |||
| 3e5f7c4f1e | |||
| 9d9b816644 | |||
| 83555f76e6 | |||
| 5edf5f914a | |||
| 22e4975cb2 | |||
| bc2f04379b | |||
| f9f951d5d8 | |||
| 355129fea5 | |||
| d9aed80ded | |||
| c07d2d70a8 | |||
| ed6937a5a9 | |||
| 34c15932a3 | |||
| b18ea96d11 | |||
| 196ab827f5 | |||
| ba4cb4d5e9 | |||
| 58d883b825 | |||
| 5fc5ebfc6c | |||
| fe3e20fd53 | |||
| e7e59459ab | |||
| f4d7c84e19 | |||
| 9050a346e4 | |||
| 9690ccf4ea | |||
| 97745f0f1c | |||
| 61a696b9db | |||
| 3e01adaf0e | |||
| 37393b7e98 | |||
| ecd859a67c | |||
| decca8e671 | |||
| 5ea0815187 | |||
| cf149650f5 | |||
| 4c6c231ea4 | |||
| 5955b26509 | |||
| 31f54bca55 | |||
| b1ae7bb736 | |||
| 31fe12e0da | |||
| 90b0c5e295 | |||
| 79fe1930cf | |||
| ab225c3eab | |||
| 6f1de75909 | |||
| 230ed64e41 | |||
| ef126c3a93 | |||
| 51a7534733 | |||
| 4f5d2bde13 | |||
| 3d05fe5d77 | |||
| c16ca673af | |||
| 6619034bce | |||
| c56fb5d8f7 | |||
| b407a5edb5 | |||
| e6a27d17fb | |||
| 34077fd479 | |||
| 7a68ad5a7f | |||
| 74a1b6c2f2 | |||
| 9a90ae5264 | |||
| 310c1bc105 | |||
| cd20b29299 | |||
| 0cb7aeb81c | |||
| 98db5eeeae | |||
| c21cb34ff6 | |||
| e28c7b9d92 | |||
| ee4e565604 | |||
| 6dbb089f4c | |||
| c4b694db8d | |||
| 97f428ad06 | |||
| ef92ee5408 | |||
| d094668d03 | |||
| 5bb5fc1625 | |||
| 1d57e0071d | |||
| 2a344c4f5c | |||
| ce02559b8d | |||
| e42746e372 | |||
| 3149dfd03a | |||
| e499fdbdab | |||
| e57df39248 | |||
| 09b192b98b | |||
| 13f01a0621 | |||
| cf879a1a58 | |||
| fcdf2ab63e | |||
| 083d8109c2 | |||
| 89cfc8b25f | |||
| c46e157f92 | |||
| 05d6026d37 | |||
| 8e98d5c146 | |||
| 3f311c0669 | |||
| b1a2f9d42b | |||
| 142f55c94c | |||
| 230a110e52 | |||
| 83e2b031cd | |||
| 4844e26e5c | |||
| 70a049af3c | |||
| dc11776c86 | |||
| 2448a42b90 | |||
| c75a900174 | |||
| 2fb7adfe0e | |||
| dc82270724 | |||
| d880a48dd0 | |||
| 7567e8b45e | |||
| 0d59a90151 | |||
| 98ad550b1a | |||
| b58f43ce9f | |||
| acf6adcd91 | |||
| daf6576c3c | |||
| 8caa4defa6 | |||
| 26918b8de4 | |||
| 6fb5ebe2f9 | |||
| c0aa67995b | |||
| 9f841f8328 | |||
| 99c75eece9 | |||
| 57d2586ee3 | |||
| 4280a43ec8 | |||
| 7f1082bbb2 | |||
| 57cfc45804 | |||
| 30e8913875 | |||
| 0ce6d4d7a4 | |||
| 584ba8d48e | |||
| 925805ee11 | |||
| 76fb73c971 | |||
| 6d19ea9ac0 | |||
| 90431090e9 | |||
| 6dff35b204 | |||
| e634c7978d | |||
| 7a9e99bba2 | |||
| efcdd4405b | |||
| bf3614690f | |||
| 7463e00da3 | |||
| cbe9de0c57 | |||
| a023507d42 | |||
| e48f544ddc | |||
| 4aa7ad5642 | |||
| c39cdbcd01 | |||
| 71eaa8bcc6 | |||
| 1e1cbdfc79 | |||
| cc8af4a43a | |||
| 43fbd48ab8 | |||
| 5ec66e9452 | |||
| 211521c82e | |||
| 4ddaab1efb | |||
| 53e5ce2e83 | |||
| 9f4bd1cb64 | |||
| 456863752b | |||
| c2dc34bbd6 | |||
| fcabb04baf | |||
| 8e7c32d3d6 | |||
| 7e3013d914 | |||
| 4a664c33d2 | |||
| 6d049ee2e4 | |||
| fa73e73664 | |||
| bf67ee6056 | |||
| a1abef2ee9 | |||
| a753e01d3c | |||
| 9b15065b24 | |||
| 6e4150537c | |||
| 233d715a14 | |||
| 77ac385dfe | |||
| 53b78fcd7d | |||
| 16f81bd7ee | |||
| 0ee049fd11 | |||
| 7dba17e5bc | |||
| eeb678b937 | |||
| fe4eb664fd | |||
| 257720e443 | |||
| e7afaedf3e | |||
| b66b47a708 | |||
| fe485ff62e | |||
| 1ebe1cee67 | |||
| e9252eb48a | |||
| dad7728135 | |||
| c5111e3335 | |||
| bbbdb98362 | |||
| 60cdc2af84 | |||
| 344c20f331 | |||
| 2b0496e947 | |||
| 6c63dba6fb | |||
| 734c021a2e | |||
| eeb034896f | |||
| 4c977e8384 | |||
| c6137713c7 | |||
| fd4b1893f1 | |||
| e542e6136b | |||
| 393451e304 | |||
| 5084ba27ab | |||
| c82771f841 | |||
| dc6860535a | |||
| c872617b4e | |||
| 47c8682761 | |||
| 683400788b | |||
| 05065a8329 | |||
| 1ae4d2bbc7 | |||
| ae38f406fd | |||
| 4897d01cb0 | |||
| bd7b563463 | |||
| 530241dd0b | |||
| 6338641107 | |||
| 6d62fb89c3 | |||
| 7d4df3b6e5 | |||
| bc28db5b92 | |||
| f78186c0f7 | |||
| e3292f5566 | |||
| 58f980f411 | |||
| 4740d0611d | |||
| 3651a10e80 | |||
| 483b51c51c | |||
| cdbddef86d | |||
| 3690109abf | |||
| 2e322b4fc8 | |||
| 735e5f3ddc | |||
| e4cb4c75e5 | |||
| 1693deff72 | |||
| 3270f1228d | |||
| eeabf48d29 | |||
| 89348aa8e5 | |||
| 3ab2ce27b5 | |||
| 265261862f | |||
| 66cf052b8c | |||
| 2ca2d81e58 | |||
| 951ba4dfd8 | |||
| 386d210e8b | |||
| 9321602845 | |||
| 26c06353f0 | |||
| 62cf12d6eb | |||
| 253ee61463 | |||
| 2ccd2a9397 | |||
| c139e8e3e6 | |||
| 6e6e96c422 | |||
| b677e5226d | |||
| df723584b6 | |||
| efe06ffff0 | |||
| 6ba052d58f | |||
| 8cf52058b5 | |||
| 1bae09126c | |||
| bbbae9de9d | |||
| 7cb6d06316 | |||
| bca5492829 | |||
| f6a4d8681f | |||
| fd3836ec95 | |||
| f304c2dc08 | |||
| f13a1a2fc3 | |||
| df1453e30c | |||
| bac204f800 | |||
| dbf24a7daa | |||
| 6a29b1ac96 | |||
| 6c700d9e0f | |||
| ab69e87c2a | |||
| d1f97531dc | |||
| 17ebbca6ea | |||
| 08ddaaaa2f | |||
| 7515fe5f3e | |||
| cd49dae7ed | |||
| 2977f56061 | |||
| 8938286862 | |||
| 7b90d03f28 | |||
| 9dfe4d6d79 | |||
| 3a781a453e | |||
| e23487b1d8 | |||
| ccee75721b | |||
| 0a4147116c | |||
| e58b40b34c | |||
| 0db05b9b96 | |||
| a8a191ae87 | |||
| 4d92775aa8 | |||
| 477847111e | |||
| efbcfb1d2e | |||
| a9b01c761c | |||
| dac2f7c84e | |||
| 478142e509 | |||
| e76d5ba679 | |||
| 23dc9c0f68 | |||
| 8b96176d8a | |||
| 1bbf5f4823 | |||
| 2f57682035 | |||
| 4c05160f98 | |||
| 3e9ad64a7f | |||
| 21fa19c73b | |||
| 58257d546b | |||
| 5e99d810fd | |||
| a2dc717d85 | |||
| 87062e6ca8 | |||
| ae3a21c5ff | |||
| ccebb8a2fa | |||
| 2d21d6e688 | |||
| 270d96b7e3 | |||
| ea21daa96f | |||
| 42845f8d07 | |||
| 8b63ae9c46 | |||
| 173060dc50 | |||
| d19b35cd48 | |||
| 0c83fbd679 | |||
| 6ae9c1d9cb | |||
| 27523b657a | |||
| 56d73c1a3f | |||
| 0d2ad9faab | |||
| 4572f00657 | |||
| 9ed208131f | |||
| 91b03b2ea7 | |||
| 70b5dc3a63 | |||
| d6ab0aa232 | |||
| f679e1c76b | |||
| b91f86ba3d | |||
| 0f2302fda4 | |||
| ff729c05af | |||
| 76a6821fb8 | |||
| 97c7a38a69 | |||
| 5d398a8a64 | |||
| 4252f6186b | |||
| 22148ade9f | |||
| b8332fe8e1 | |||
| e40e92a133 | |||
| ba8f345f80 | |||
| 2ddbf1ba0d | |||
| 23567c8f98 | |||
| 8d39ae7763 | |||
| a2edc41fc7 | |||
| 591b6fc44d | |||
| f8a3d92ce0 | |||
| a1d18d83da | |||
| 1ad881e9fc | |||
| 2de26be464 | |||
| 36f09543b2 | |||
| 393acf8557 | |||
| ab27f2ab79 | |||
| d13b5ea30a | |||
| 81843b9285 | |||
| b19f85234b | |||
| 9dee30a616 | |||
| 4489eb1291 | |||
| dde72e3800 | |||
| e49fca4b51 | |||
| 0411b08c07 | |||
| 6c490ab781 | |||
| 737884d297 | |||
| 00c63b046d | |||
| d04bf78335 | |||
| 7dbe12b893 | |||
| 4caa9cbc02 | |||
| af1f61186a | |||
| 13dea6ae1a | |||
| fa9698a681 | |||
| 4bed7c7895 | |||
| a3954f3dda | |||
| b779e231bf | |||
| de12de1437 | |||
| 5274ea5277 | |||
| 398d775122 | |||
| aca18e12ef | |||
| 270c2ed0aa | |||
| 2cf960196f | |||
| 6b83edc8fd | |||
| 711822223a | |||
| 8e6872b57c | |||
| e14224b05d | |||
| c3a30898af | |||
| f3f6fb0444 | |||
| 641b6d61a7 | |||
| c0996a64a7 | |||
| a8904f39e2 | |||
| 81011f6336 | |||
| 117b193ee9 | |||
| bd724a7939 | |||
| 866cdca216 | |||
| ba321ac5b0 | |||
| 8fab0ac2ad | |||
| 563eb936e4 | |||
| 09c069de4b | |||
| 1224c3b40e | |||
| 1001a1586f | |||
| cb01f6e002 | |||
| 6ffc59d96a | |||
| 860b8e58f8 | |||
| 615699e2aa | |||
| 7c8436373e | |||
| cfe97c6777 | |||
| fdbc626789 | |||
| 1638349028 | |||
| 17058d4ddb | |||
| 2661f330e2 | |||
| 46b9b653c1 | |||
| c7ca760e4a | |||
| deb9cb23cc | |||
| 063ed8ee46 | |||
| 43b09e64da | |||
| 7bf72e21be | |||
| dd80807db3 | |||
| a84bec3afd | |||
| 0b5ad46744 | |||
| 260d080fda | |||
| 4f0c8ba1be |
@@ -0,0 +1,8 @@
|
||||
# Changesets
|
||||
|
||||
Hello and welcome! This folder has been automatically generated by `@changesets/cli`, a build tool that works
|
||||
with multi-package repos, or single-package repos to help you version and publish your code. You can
|
||||
find the full documentation for it [in our repository](https://github.com/changesets/changesets)
|
||||
|
||||
We have a quick list of common questions to get you started engaging with this project in
|
||||
[our documentation](https://github.com/changesets/changesets/blob/main/docs/common-questions.md)
|
||||
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"$schema": "https://unpkg.com/@changesets/config@3.1.1/schema.json",
|
||||
"changelog": "@changesets/cli/changelog",
|
||||
"commit": false,
|
||||
"fixed": [],
|
||||
"linked": [],
|
||||
"access": "restricted",
|
||||
"baseBranch": "main",
|
||||
"updateInternalDependencies": "patch",
|
||||
"ignore": []
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
Write a concise description of what the bug is.
|
||||
|
||||
**Files**
|
||||
If possible, please provide the PDF file causing the issue.
|
||||
|
||||
**Job ID**
|
||||
If you have it, please provide the ID of the job you ran.
|
||||
You can find it here: https://cloud.llamaindex.ai/parse in the "History" tab.
|
||||
|
||||
**Client:**
|
||||
Please remove untested options:
|
||||
- Python Library
|
||||
- API
|
||||
- Frontend (cloud.llamaindex.ai)
|
||||
- Typescript Library
|
||||
- Notebook
|
||||
|
||||
**Additional context**
|
||||
Add any additional context about the problem here.
|
||||
What options did you use? Premium mode, multimodal, fast mode, parsing instructions, etc.
|
||||
Screenshots, code snippets, etc.
|
||||
@@ -0,0 +1,10 @@
|
||||
---
|
||||
name: Custom issue
|
||||
about: Not a bug nor a feature request
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
# and
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
@@ -0,0 +1,53 @@
|
||||
name: Build Package - Python
|
||||
|
||||
# Build package on its own without additional pip install
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "py/**"
|
||||
pull_request:
|
||||
paths:
|
||||
- "py/**"
|
||||
env:
|
||||
UV_VERSION: "0.7.20"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
# You can use PyPy versions in python-version.
|
||||
# For example, pypy-2.7 and pypy-3.8
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
python-version: ["3.9"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install
|
||||
|
||||
- name: Display Python version
|
||||
run: python --version
|
||||
|
||||
- name: Build
|
||||
working-directory: py
|
||||
run: uv build
|
||||
|
||||
- name: Test installing built package
|
||||
shell: bash
|
||||
working-directory: py
|
||||
run: |
|
||||
uv venv
|
||||
uv pip install dist/*.whl
|
||||
|
||||
- name: Test import
|
||||
working-directory: py
|
||||
run: uv run -- python -c "import llama_cloud_services"
|
||||
@@ -0,0 +1,34 @@
|
||||
name: Build Package - TypeScript
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "ts/**"
|
||||
pull_request:
|
||||
paths:
|
||||
- "ts/**"
|
||||
|
||||
jobs:
|
||||
pre_release:
|
||||
name: Pre Release
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version-file: "ts/llama_cloud_services/.nvmrc"
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: ts/llama_cloud_services/
|
||||
run: pnpm install --no-frozen-lockfile
|
||||
|
||||
- name: Build
|
||||
working-directory: ts/llama_cloud_services/
|
||||
run: pnpm run build
|
||||
@@ -0,0 +1,95 @@
|
||||
name: Claude Code
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
pull_request_review_comment:
|
||||
types: [created]
|
||||
issues:
|
||||
types: [opened, assigned]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
jobs:
|
||||
claude:
|
||||
if: |
|
||||
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
||||
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Check repository access
|
||||
id: check-access
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Get the user who triggered the event
|
||||
case "${{ github.event_name }}" in
|
||||
"issue_comment")
|
||||
USER="${{ github.event.comment.user.login }}"
|
||||
;;
|
||||
"pull_request_review_comment")
|
||||
USER="${{ github.event.comment.user.login }}"
|
||||
;;
|
||||
"pull_request_review")
|
||||
USER="${{ github.event.review.user.login }}"
|
||||
;;
|
||||
"issues")
|
||||
USER="${{ github.event.issue.user.login }}"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Checking repository access for user: $USER"
|
||||
|
||||
# Check if user has write access to the repository
|
||||
REPO="${{ github.repository }}"
|
||||
if gh api repos/$REPO/collaborators/$USER/permission --jq '.permission' | grep -E "(admin|write)" > /dev/null 2>&1; then
|
||||
echo "User $USER has write access to the repository"
|
||||
echo "authorized=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "User $USER does not have write access to the repository"
|
||||
echo "authorized=false" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.check-access.outputs.authorized == 'true'
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code
|
||||
if: steps.check-access.outputs.authorized == 'true'
|
||||
id: claude
|
||||
uses: anthropics/claude-code-action@beta
|
||||
with:
|
||||
anthropic_api_key: ${{ secrets.ANTHROPIC_GITHUB_API_KEY }}
|
||||
|
||||
# Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4)
|
||||
# model: "claude-opus-4-20250514"
|
||||
|
||||
# Optional: Customize the trigger phrase (default: @claude)
|
||||
# trigger_phrase: "/claude"
|
||||
|
||||
# Optional: Trigger when specific user is assigned to an issue
|
||||
# assignee_trigger: "claude-bot"
|
||||
|
||||
# Optional: Allow Claude to run specific commands
|
||||
# Allow bash commands to be run, for things like running tests, linting, etc.
|
||||
allowed_tools: "Bash(rg:*),Bash(find:*),Bash(grep:*),Bash(pnpm:*),Bash(npm:*),Bash(uv:*),Bash(pip:*),Bash(pipx:*),Bash(make:*),Bash(cd:*),WebFetch"
|
||||
|
||||
# Optional: Add custom instructions for Claude to customize its behavior for your project
|
||||
# custom_instructions: |
|
||||
# Follow our coding standards
|
||||
# Ensure all new code has tests
|
||||
# Use TypeScript for new files
|
||||
|
||||
# Optional: Custom environment variables for Claude
|
||||
# claude_env: |
|
||||
# NODE_ENV: test
|
||||
@@ -0,0 +1,41 @@
|
||||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: ["main"]
|
||||
schedule:
|
||||
- cron: "30 16 * * 4"
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
# Runner size impacts CodeQL analysis time. To learn more, please see:
|
||||
# - https://gh.io/recommended-hardware-resources-for-running-codeql
|
||||
# - https://gh.io/supported-runners-and-hardware-resources
|
||||
# - https://gh.io/using-larger-runners
|
||||
# Consider using larger runners for possible analysis time improvements.
|
||||
runs-on: "ubuntu-latest"
|
||||
timeout-minutes: 360
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: python
|
||||
dependency-caching: true
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v4
|
||||
with:
|
||||
category: "/language:python"
|
||||
@@ -0,0 +1,162 @@
|
||||
name: Extract E2E Tests (every 4 hours)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 */4 * * *"
|
||||
workflow_dispatch:
|
||||
# Allows manual triggering
|
||||
inputs:
|
||||
environment:
|
||||
description: "Environment to run the tests in"
|
||||
required: false
|
||||
default: staging
|
||||
type: choice
|
||||
options:
|
||||
- staging
|
||||
- production
|
||||
notify_slack:
|
||||
description: "Notify Slack"
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
workflow_call:
|
||||
|
||||
env:
|
||||
UV_VERSION: "0.7.20"
|
||||
PYTHON_VERSION: "3.12"
|
||||
SLACK_CHANNEL_ID: C078PHNTF44 # Extract channel ID
|
||||
API_E2E_LOG_PATH: ${{ github.workspace }}/extract-e2e.log
|
||||
|
||||
jobs:
|
||||
extract-e2e:
|
||||
name: "Extract E2E Tests (${{ matrix.environment }})"
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.environment }}
|
||||
cancel-in-progress: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
environment: ${{ github.event_name == 'schedule' && fromJson('["staging", "production"]') || fromJson(format('["{0}"]', github.event.inputs.environment || 'staging')) }}
|
||||
steps:
|
||||
- name: Set runtime inputs
|
||||
id: runtime
|
||||
run: |
|
||||
environment=${{ matrix.environment }}
|
||||
notify_slack=${{ github.event.inputs.notify_slack || github.event_name == 'schedule' }}
|
||||
echo "environment=${environment}" >> $GITHUB_OUTPUT
|
||||
echo "notify_slack=${notify_slack}" >> $GITHUB_OUTPUT
|
||||
|
||||
if [ "${environment}" = "production" ]; then
|
||||
echo "LLAMA_CLOUD_BASE_URL=https://api.cloud.llamaindex.ai" >> $GITHUB_ENV
|
||||
api_key_secret="${{ secrets.LLAMA_CLOUD_API_KEY }}"
|
||||
project_id_secret="${{ secrets.LLAMA_CLOUD_PROJECT_ID }}"
|
||||
else
|
||||
echo "LLAMA_CLOUD_BASE_URL=https://api.staging.llamaindex.ai" >> $GITHUB_ENV
|
||||
api_key_secret="${{ secrets.LLAMA_CLOUD_API_KEY_STAGING }}"
|
||||
project_id_secret="${{ secrets.LLAMA_CLOUD_PROJECT_ID_STAGING }}"
|
||||
fi
|
||||
|
||||
if [ -n "$api_key_secret" ]; then
|
||||
echo "LLAMA_CLOUD_API_KEY=$api_key_secret" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -n "$project_id_secret" ]; then
|
||||
echo "LLAMA_CLOUD_PROJECT_ID=$project_id_secret" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install ${{ env.PYTHON_VERSION }} && uv python pin ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Run Extract E2E tests
|
||||
id: extract-tests
|
||||
continue-on-error: true
|
||||
working-directory: py
|
||||
run: |
|
||||
set -o pipefail
|
||||
rm -f "$API_E2E_LOG_PATH"
|
||||
uv run pytest -v -n 8 --timeout=300 --session-timeout=1740 tests/extract/ 2>&1 | tee "$API_E2E_LOG_PATH"
|
||||
|
||||
- name: Extract pytest failure summary
|
||||
id: failed-tests
|
||||
if: steps.extract-tests.outcome == 'failure' || cancelled()
|
||||
run: |
|
||||
summary="$(python3 - <<'PY'
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
log_path = Path(os.environ["API_E2E_LOG_PATH"])
|
||||
if not log_path.exists():
|
||||
print("Test log not found.")
|
||||
raise SystemExit(0)
|
||||
|
||||
lines = log_path.read_text(errors="ignore").splitlines()
|
||||
|
||||
# Find the "short test summary info" section
|
||||
start = None
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith("=") and "short test summary info" in line:
|
||||
start = i + 1
|
||||
break
|
||||
|
||||
if start is None:
|
||||
print("No test summary found.")
|
||||
raise SystemExit(0)
|
||||
|
||||
# Extract just the FAILED/ERROR lines (test name + short reason)
|
||||
failed_tests = []
|
||||
for line in lines[start:]:
|
||||
if line.startswith("="):
|
||||
break # End of section
|
||||
if line.startswith("FAILED ") or line.startswith("ERROR "):
|
||||
# Extract test name and truncate the error message
|
||||
match = re.match(r"(FAILED|ERROR) ([\w/:.\[\]_-]+)", line)
|
||||
if match:
|
||||
failed_tests.append(f"{match.group(1)}: {match.group(2)}")
|
||||
|
||||
if failed_tests:
|
||||
print("\n".join(failed_tests[:20])) # Limit to 20 tests max
|
||||
else:
|
||||
print("No failed tests found in summary.")
|
||||
PY
|
||||
)"
|
||||
if [ -z "$summary" ]; then
|
||||
summary="Failed test summary not available. Review the full run logs."
|
||||
fi
|
||||
{
|
||||
printf 'summary<<EOF\n%s\nEOF\n' "$summary"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check test results
|
||||
if: always()
|
||||
run: |
|
||||
if [ "${{ steps.extract-tests.outcome }}" == "failure" ]; then
|
||||
echo "Extract E2E tests failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Post to Extract Slack channel
|
||||
id: slack
|
||||
if: (failure() || cancelled()) && steps.runtime.outputs.notify_slack == 'true'
|
||||
uses: slackapi/slack-github-action@v2.1.1
|
||||
with:
|
||||
channel-id: ${{ env.SLACK_CHANNEL_ID }}
|
||||
slack-message: |
|
||||
:red_circle: *Extract E2E Failed* (${{ steps.runtime.outputs.environment }})
|
||||
```
|
||||
${{ steps.failed-tests.outputs.summary }}
|
||||
```
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
@@ -0,0 +1,46 @@
|
||||
name: Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
UV_VERSION: "0.7.20"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
# You can use PyPy versions in python-version.
|
||||
# For example, pypy-2.7 and pypy-3.8
|
||||
matrix:
|
||||
python-version: ["3.9"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install ${{ matrix.python-version }}
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version-file: "ts/llama_cloud_services/.nvmrc"
|
||||
- name: Install dependencies
|
||||
run: pnpm install --no-frozen-lockfile
|
||||
|
||||
- name: Run linter
|
||||
shell: bash
|
||||
working-directory: py
|
||||
run: uv run -- pre-commit run -a
|
||||
# the js checks are run roundaboutly through lint-staged, and -a doesn't run it. Run them directly.
|
||||
- run: pnpm -w --filter llama-cloud-services run lint
|
||||
- run: pnpm -w --filter llama-cloud-services run format:check
|
||||
@@ -0,0 +1,39 @@
|
||||
name: Test end-to-end - Python
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "py/**"
|
||||
|
||||
env:
|
||||
UV_VERSION: "0.7.20"
|
||||
LLAMA_CLOUD_API_KEY: ${{ secrets.LLAMA_CLOUD_API_KEY }}
|
||||
|
||||
jobs:
|
||||
test_e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
# You can use PyPy versions in python-version.
|
||||
# For example, pypy-2.7 and pypy-3.8
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install ${{ matrix.python-version }} && uv python pin ${{ matrix.python-version }}
|
||||
|
||||
- name: Run Tests
|
||||
working-directory: py
|
||||
run: make e2e
|
||||
|
||||
- name: Remove virtual environment
|
||||
working-directory: py
|
||||
run: rm -rf .venv/
|
||||
@@ -0,0 +1,42 @@
|
||||
name: Test - Python
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "py/**"
|
||||
pull_request:
|
||||
paths:
|
||||
- "py/**"
|
||||
|
||||
env:
|
||||
UV_VERSION: "0.7.20"
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
# You can use PyPy versions in python-version.
|
||||
# For example, pypy-2.7 and pypy-3.8
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install ${{ matrix.python-version }} && uv python pin ${{ matrix.python-version }}
|
||||
|
||||
- name: Run Tests
|
||||
working-directory: py
|
||||
run: uv run pytest unit_tests/ -v
|
||||
|
||||
- name: Remove virtual environment
|
||||
working-directory: py
|
||||
run: rm -rf .venv/
|
||||
@@ -0,0 +1,39 @@
|
||||
name: Test - TypeScript
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "ts/**"
|
||||
pull_request:
|
||||
paths:
|
||||
- "ts/**"
|
||||
|
||||
env:
|
||||
TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
|
||||
TURBO_TEAM: ${{ vars.TURBO_TEAM }}
|
||||
TURBO_REMOTE_ONLY: true
|
||||
LLAMA_CLOUD_API_KEY: ${{ secrets.LLAMA_CLOUD_API_KEY }}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test - TypeScript
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: pnpm/action-setup@v4
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version-file: "ts/llama_cloud_services/.nvmrc"
|
||||
- name: Install dependencies
|
||||
run: pnpm -r install --no-frozen-lockfile
|
||||
- name: Build package
|
||||
run: pnpm --filter llama-cloud-services build
|
||||
- name: Run Tests
|
||||
working-directory: ts/llama_cloud_services/
|
||||
run: pnpm test
|
||||
- name: Run e2e tests
|
||||
working-directory: ts/e2e-tests/
|
||||
run: pnpm test
|
||||
@@ -0,0 +1,61 @@
|
||||
name: Version Bump and Release
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
concurrency: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
jobs:
|
||||
release:
|
||||
name: Release
|
||||
runs-on: ubuntu-latest
|
||||
# Only run on main branch pushes
|
||||
if: github.ref == 'refs/heads/main'
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version: "22"
|
||||
cache: "pnpm"
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install
|
||||
|
||||
- name: Add auth token to .npmrc file
|
||||
run: |
|
||||
cat << EOF >> ".npmrc"
|
||||
//registry.npmjs.org/:_authToken=$NPM_TOKEN
|
||||
EOF
|
||||
env:
|
||||
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
- name: Create Release Pull Request or Publish packages
|
||||
id: changesets
|
||||
uses: changesets/action@v1
|
||||
with:
|
||||
commit: "chore: version packages"
|
||||
title: "chore: version packages"
|
||||
# Custom version script
|
||||
version: pnpm -w run version
|
||||
# Custom publish script
|
||||
publish: pnpm -w run publish
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
||||
LLAMA_PARSE_PYPI_TOKEN: ${{ secrets.LLAMA_PARSE_PYPI_TOKEN }}
|
||||
@@ -1,3 +1,12 @@
|
||||
.git
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyc
|
||||
.DS_Store
|
||||
.idea
|
||||
.env*
|
||||
.ipynb_checkpoints*
|
||||
*_cache/
|
||||
node_modules/
|
||||
.turbo/
|
||||
dist/
|
||||
.npmrc
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
---
|
||||
default_language_version:
|
||||
python: python3
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.5.0
|
||||
hooks:
|
||||
- id: check-byte-order-marker
|
||||
- id: check-merge-conflict
|
||||
- id: check-symlinks
|
||||
- id: check-toml
|
||||
- id: check-yaml
|
||||
- id: detect-private-key
|
||||
- id: end-of-file-fixer
|
||||
- id: mixed-line-ending
|
||||
- id: trailing-whitespace
|
||||
exclude: ^ts/llama_cloud_services/src/client/
|
||||
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
||||
rev: v0.1.5
|
||||
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --exit-non-zero-on-fix]
|
||||
exclude: ".*uv.lock|examples/"
|
||||
- repo: https://github.com/psf/black-pre-commit-mirror
|
||||
rev: 23.10.1
|
||||
hooks:
|
||||
- id: black-jupyter
|
||||
name: black-src
|
||||
alias: black
|
||||
exclude: ".*uv.lock|examples/extract/solar_panel_e2e_comparison.ipynb"
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.0.1
|
||||
hooks:
|
||||
- id: mypy
|
||||
exclude: ^py/tests|^py/unit_tests|^examples
|
||||
additional_dependencies:
|
||||
[
|
||||
"types-requests",
|
||||
"types-Deprecated",
|
||||
"types-redis",
|
||||
"types-setuptools",
|
||||
"types-PyYAML",
|
||||
"types-protobuf==4.24.0.4",
|
||||
]
|
||||
args:
|
||||
[
|
||||
--disallow-untyped-defs,
|
||||
--ignore-missing-imports,
|
||||
--python-version=3.10,
|
||||
]
|
||||
- repo: https://github.com/adamchainz/blacken-docs
|
||||
rev: 1.16.0
|
||||
hooks:
|
||||
- id: blacken-docs
|
||||
name: black-docs-text
|
||||
alias: black
|
||||
types_or: [rst, markdown, tex]
|
||||
additional_dependencies: [black==23.10.1]
|
||||
# Using PEP 8's line length in docs prevents excess left/right scrolling
|
||||
args: [--line-length=79]
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: lint-staged
|
||||
name: Run lint-staged for TS files
|
||||
entry: pnpm -w exec lint-staged
|
||||
language: system
|
||||
pass_filenames: false
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.2.6
|
||||
hooks:
|
||||
- id: codespell
|
||||
additional_dependencies: [tomli]
|
||||
exclude: ^(uv.lock|docs|ts|examples|pnpm-lock.yaml)
|
||||
args:
|
||||
[
|
||||
"--ignore-words-list",
|
||||
"astroid,gallary,momento,narl,ot,rouge,nin,gere,te,inh,vor",
|
||||
]
|
||||
- repo: https://github.com/srstevenson/nb-clean
|
||||
rev: 3.1.0
|
||||
hooks:
|
||||
- id: nb-clean
|
||||
args: [--preserve-cell-outputs, --remove-empty-cells]
|
||||
- repo: https://github.com/pappasam/toml-sort
|
||||
rev: v0.23.1
|
||||
hooks:
|
||||
- id: toml-sort-fix
|
||||
exclude: ".*uv.lock"
|
||||
|
||||
exclude: ^(.github/ISSUE_TEMPLATE|ts/llama_cloud_services/src/client|pnpm-lock.yaml)
|
||||
@@ -0,0 +1,33 @@
|
||||
# Python
|
||||
|
||||
## Installation
|
||||
|
||||
This project uses uv. Create a virtual environment, and run `uv sync`
|
||||
|
||||
## Versioning (Maintainers only)
|
||||
|
||||
Before merging your changes, make sure to bump the versions.
|
||||
|
||||
Make a version bump to `pyproject.toml`. If the underlying dependency on the llamacloud platform OpenAPI
|
||||
sdk needs bumping, make sure to bring that in as well. If updating dependencies, run `uv lock`.
|
||||
|
||||
The legacy `llama_parse` package re-exports some of `llama_cloud_services` in the old namespace. The
|
||||
versions need to be kept consistent to sidecar it with `llama_cloud_services`. Bump it's version in `llama_parse/pyproject.toml`, and also bump it's dependency version of `llama-cloud-services` to match.
|
||||
|
||||
**Note**: Don't worry about updating the `llama_parse/poetry.lock` file when bumping versions. The GitHub action will automatically run `poetry lock` for the llama_parse package during the build process (though it doesn't commit the updated lockfile back to the repo).
|
||||
|
||||
You can also do this with `./scripts/version-bump.py set 0.x.x` if you have `uv` installed.
|
||||
|
||||
Once the change is merged, push a tag `git tag -a v0.x.x -m 0.x.x` and `git push origin v0.x.x`.
|
||||
|
||||
This tagging step can be done with `./scripts/version-bump tag`.
|
||||
|
||||
# Typescript
|
||||
|
||||
## Installation
|
||||
|
||||
...
|
||||
|
||||
## Versioning
|
||||
|
||||
...
|
||||
@@ -1,61 +1,15 @@
|
||||
# LlamaParser (Preview)
|
||||
[](https://pypi.org/project/llama-cloud-services/)
|
||||
[](https://github.com/run-llama/llama_cloud_services/graphs/contributors)
|
||||
[](https://discord.gg/dGcwcsnxhU)
|
||||
|
||||
LlamaParser is an API to efficiently parse and represent files for downstream retrieval and context augmentation in your LLM / RAG application.
|
||||
# Llama Cloud Services
|
||||
|
||||
LlamaParser directly integrates with [LlamaIndex](https://github.com/run-llama/llama_index).
|
||||
|
||||
Currently available in preview mode for **free**. Try it out today!
|
||||
|
||||
**NOTE:** Currently, only PDF files are supported.
|
||||
|
||||
## Getting Started
|
||||
|
||||
First, login and get an api-key from `https://cloud.llamaindex.ai`.
|
||||
|
||||
Install the package:
|
||||
|
||||
`pip install llama-parser`
|
||||
|
||||
Then, you can run the following to parse your first PDF file:
|
||||
|
||||
```python
|
||||
from llama_parser import LlamaParser
|
||||
|
||||
parser = LlamaParser(
|
||||
api_key="...", # can also be set in your env as LLAMA_CLOUD_API_KEY
|
||||
result_type="markdown" # "markdown" and "text" are available
|
||||
)
|
||||
|
||||
# sync
|
||||
documents = parser.load_data("./my_file.pdf")
|
||||
|
||||
# async
|
||||
documents = await parser.aload_data("./my_file.pdf")
|
||||
```
|
||||
|
||||
## Using with `SimpleDirectoryReader`
|
||||
|
||||
You can also integrate the parser as the default PDF loader in `SimpleDirectoryReader`:
|
||||
|
||||
```python
|
||||
from llama_parser import LlamaParser
|
||||
from llama_index import SimpleDirectoryReader
|
||||
|
||||
parser = LlamaParser(
|
||||
api_key="...", # can also be set in your env as LLAMA_CLOUD_API_KEY
|
||||
result_type="markdown" # "markdown" and "text" are available
|
||||
)
|
||||
|
||||
file_extractor = {".pdf": parser}
|
||||
documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()
|
||||
```
|
||||
|
||||
Full documentation for `SimpleDirectoryReader` can be found on the [LlamaIndex Documentation](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader.html).
|
||||
|
||||
## Examples
|
||||
|
||||
Serveral end-to-end indexing examples can be found in the examples folder
|
||||
|
||||
- [Getting Started](examples/demo_basic.ipynb)
|
||||
- [Advanced RAG Example](examples/demo_advanced.ipynb)
|
||||
- [Raw API Usage](examples/demo_api.ipynb)
|
||||
> **⚠️ DEPRECATION NOTICE**
|
||||
>
|
||||
> This repository and its packages are deprecated and will be maintained until **May 1, 2026**.
|
||||
>
|
||||
> **Please migrate to the new packages:**
|
||||
> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))
|
||||
> - **TypeScript**: `npm install @llamaindex/llama-cloud` ([GitHub](https://github.com/run-llama/llama-cloud-ts))
|
||||
>
|
||||
> The new packages provide the same functionality with improved performance, better support, and active development.
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
# LlamaCloud Services Examples - Python
|
||||
|
||||
In this folder you will find several TypeScript end-to-end applications that contain examples regarding:
|
||||
|
||||
- [LlamaParse](./parse/)
|
||||
- [LlamaCloud Index](./index/)
|
||||
|
||||
Follow the instructions in each example folder to get started!
|
||||
@@ -0,0 +1,21 @@
|
||||
node_modules
|
||||
package-lock.json
|
||||
yarn.lock
|
||||
|
||||
.DS_Store
|
||||
.cache
|
||||
.env
|
||||
.vercel
|
||||
.output
|
||||
.nitro
|
||||
/build/
|
||||
/api/
|
||||
/server/build
|
||||
/public/build# Sentry Config File
|
||||
.env.sentry-build-plugin
|
||||
/test-results/
|
||||
/playwright-report/
|
||||
/blob-report/
|
||||
/playwright/.cache/
|
||||
.tanstack
|
||||
.vscode
|
||||
@@ -0,0 +1,4 @@
|
||||
**/build
|
||||
**/public
|
||||
pnpm-lock.yaml
|
||||
routeTree.gen.ts
|
||||
@@ -0,0 +1,88 @@
|
||||
# LlamaClassify Demo
|
||||
|
||||
A TypeScript demo application showcasing the power of **LlamaClassify** - an agentic documents classification service from [LlamaCloud](https://cloud.llamaindex.ai). This demo allows you to classify financial documents among three different types (Cash flow statement, Income Statement and Balance Sheet).
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Installation](#installation)
|
||||
- [Usage](#usage)
|
||||
- [Start the Demo](#start-the-demo)
|
||||
- [How It Works](#how-it-works)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Common Issues](#common-issues)
|
||||
- [License](#license)
|
||||
- [Contributing](#contributing)
|
||||
|
||||
## Features
|
||||
|
||||
- 📄 **Documemt Classification**: Classify files based on well-defined rules you can customized and play around with.
|
||||
- 🤖 **Reasoning-based Actionable Insights**: Get in-depth, reasoning based insights on the document classification, accompanied by confidence scores.
|
||||
- 🎨 **Beautiful UI**: [DaisyUI](https://daisyui.com)-based interface powered by [TanStack](https://tanstack.com)
|
||||
- ⚡ **Fast Development**: Hot reload support with development mode
|
||||
- 🛠️ **TypeScript**: Full TypeScript support with strict type checking
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js (version 22 or higher)
|
||||
- pnpm package manager
|
||||
- LlamaCloud API key
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/run-llama/llama_cloud_services
|
||||
cd lama_cloud_services/examples-ts/classify/
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
3. Set up your environment variables:
|
||||
|
||||
```bash
|
||||
# Add your API key to your environment
|
||||
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the Demo
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The application will be up and running on http://localhost:3000
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Document Input**: Enter the path to your document when prompted
|
||||
2. **Parsing**: LlamaClassify, based on the rules you can find [here](./src/utils/classifier.ts), processes the document and classifies it
|
||||
3. **Results**: The classification outcome, as well as the reasoning behind it and the confidence score, are displayed in the UI.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Module Resolution Errors**: Ensure you're using Node.js 22+ and have all dependencies installed
|
||||
2. **API Key Issues**: Verify your LlamaCloud API key is correctly set
|
||||
3. **File Path Errors**: Use absolute paths or ensure relative paths are correct from the project root
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see the [LICENSE](../../LICENSE) file for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Run `npm run format` and `npm run lint`
|
||||
5. Submit a pull request
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"name": "tanstack-start-example-basic",
|
||||
"private": true,
|
||||
"sideEffects": false,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite dev",
|
||||
"build": "vite build && tsc --noEmit",
|
||||
"start": "node .output/server/index.mjs"
|
||||
},
|
||||
"dependencies": {
|
||||
"@tanstack/react-router": "^1.133.22",
|
||||
"@tanstack/react-router-devtools": "^1.133.22",
|
||||
"@tanstack/react-start": "^1.133.22",
|
||||
"llama-cloud-services": "file:../../ts/llama_cloud_services",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"tailwind-merge": "^2.6.0",
|
||||
"zod": "^3.24.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/postcss": "^4.1.15",
|
||||
"@types/node": "^22.5.4",
|
||||
"@types/react": "^19.0.8",
|
||||
"@types/react-dom": "^19.0.3",
|
||||
"@vitejs/plugin-react": "^4.6.0",
|
||||
"daisyui": "^5.3.7",
|
||||
"postcss": "^8.5.1",
|
||||
"tailwindcss": "^4.1.15",
|
||||
"typescript": "^5.7.2",
|
||||
"vite": "^7.1.7",
|
||||
"vite-tsconfig-paths": "^5.1.4"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
export default {
|
||||
plugins: {
|
||||
'@tailwindcss/postcss': {},
|
||||
},
|
||||
}
|
||||
|
After Width: | Height: | Size: 3.3 KiB |
|
After Width: | Height: | Size: 21 KiB |
|
After Width: | Height: | Size: 3.8 KiB |
|
After Width: | Height: | Size: 862 B |
|
After Width: | Height: | Size: 1.1 KiB |
|
After Width: | Height: | Size: 1.1 KiB |
|
After Width: | Height: | Size: 2.0 KiB |
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "",
|
||||
"short_name": "",
|
||||
"icons": [
|
||||
{
|
||||
"src": "/android-chrome-192x192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png"
|
||||
},
|
||||
{
|
||||
"src": "/android-chrome-512x512.png",
|
||||
"sizes": "512x512",
|
||||
"type": "image/png"
|
||||
}
|
||||
],
|
||||
"theme_color": "#ffffff",
|
||||
"background_color": "#ffffff",
|
||||
"display": "standalone"
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
import {
|
||||
ErrorComponent,
|
||||
Link,
|
||||
rootRouteId,
|
||||
useMatch,
|
||||
useRouter,
|
||||
} from '@tanstack/react-router'
|
||||
import type { ErrorComponentProps } from '@tanstack/react-router'
|
||||
|
||||
export function DefaultCatchBoundary({ error }: ErrorComponentProps) {
|
||||
const router = useRouter()
|
||||
const isRoot = useMatch({
|
||||
strict: false,
|
||||
select: (state) => state.id === rootRouteId,
|
||||
})
|
||||
|
||||
console.error('DefaultCatchBoundary Error:', error)
|
||||
|
||||
return (
|
||||
<div className="min-w-0 flex-1 p-4 flex flex-col items-center justify-center gap-6">
|
||||
<ErrorComponent error={error} />
|
||||
<div className="flex gap-2 items-center flex-wrap">
|
||||
<button
|
||||
onClick={() => {
|
||||
router.invalidate()
|
||||
}}
|
||||
className={`px-2 py-1 bg-gray-600 dark:bg-gray-700 rounded-sm text-white uppercase font-extrabold`}
|
||||
>
|
||||
Try Again
|
||||
</button>
|
||||
{isRoot ? (
|
||||
<Link
|
||||
to="/"
|
||||
className={`px-2 py-1 bg-gray-600 dark:bg-gray-700 rounded-sm text-white uppercase font-extrabold`}
|
||||
>
|
||||
Home
|
||||
</Link>
|
||||
) : (
|
||||
<Link
|
||||
to="/"
|
||||
className={`px-2 py-1 bg-gray-600 dark:bg-gray-700 rounded-sm text-white uppercase font-extrabold`}
|
||||
onClick={(e) => {
|
||||
e.preventDefault()
|
||||
window.history.back()
|
||||
}}
|
||||
>
|
||||
Go Back
|
||||
</Link>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
import { Link } from '@tanstack/react-router'
|
||||
|
||||
export function NotFound({ children }: { children?: any }) {
|
||||
return (
|
||||
<div className="space-y-2 p-2">
|
||||
<div className="text-gray-600 dark:text-gray-400">
|
||||
{children || <p>The page you are looking for does not exist.</p>}
|
||||
</div>
|
||||
<p className="flex items-center gap-2 flex-wrap">
|
||||
<button
|
||||
onClick={() => window.history.back()}
|
||||
className="bg-emerald-500 text-white px-2 py-1 rounded-sm uppercase font-black text-sm"
|
||||
>
|
||||
Go back
|
||||
</button>
|
||||
<Link
|
||||
to="/"
|
||||
className="bg-cyan-600 text-white px-2 py-1 rounded-sm uppercase font-black text-sm"
|
||||
>
|
||||
Start Over
|
||||
</Link>
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
/* eslint-disable */
|
||||
|
||||
// @ts-nocheck
|
||||
|
||||
// noinspection JSUnusedGlobalSymbols
|
||||
|
||||
// This file was automatically generated by TanStack Router.
|
||||
// You should NOT make any changes in this file as it will be overwritten.
|
||||
// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
|
||||
|
||||
import { Route as rootRouteImport } from './routes/__root'
|
||||
import { Route as UsersRouteImport } from './routes/users'
|
||||
import { Route as IndexRouteImport } from './routes/index'
|
||||
import { Route as UsersIndexRouteImport } from './routes/users.index'
|
||||
import { Route as PostsIndexRouteImport } from './routes/posts.index'
|
||||
import { Route as UsersUserIdRouteImport } from './routes/users.$userId'
|
||||
import { Route as PostsPostIdRouteImport } from './routes/posts.$postId'
|
||||
import { Route as ApiClassifyRouteImport } from './routes/api/classify'
|
||||
import { Route as PostsPostIdDeepRouteImport } from './routes/posts_.$postId.deep'
|
||||
|
||||
const UsersRoute = UsersRouteImport.update({
|
||||
id: '/users',
|
||||
path: '/users',
|
||||
getParentRoute: () => rootRouteImport,
|
||||
} as any)
|
||||
const IndexRoute = IndexRouteImport.update({
|
||||
id: '/',
|
||||
path: '/',
|
||||
getParentRoute: () => rootRouteImport,
|
||||
} as any)
|
||||
const UsersIndexRoute = UsersIndexRouteImport.update({
|
||||
id: '/',
|
||||
path: '/',
|
||||
getParentRoute: () => UsersRoute,
|
||||
} as any)
|
||||
const PostsIndexRoute = PostsIndexRouteImport.update({
|
||||
id: '/posts/',
|
||||
path: '/posts/',
|
||||
getParentRoute: () => rootRouteImport,
|
||||
} as any)
|
||||
const UsersUserIdRoute = UsersUserIdRouteImport.update({
|
||||
id: '/$userId',
|
||||
path: '/$userId',
|
||||
getParentRoute: () => UsersRoute,
|
||||
} as any)
|
||||
const PostsPostIdRoute = PostsPostIdRouteImport.update({
|
||||
id: '/posts/$postId',
|
||||
path: '/posts/$postId',
|
||||
getParentRoute: () => rootRouteImport,
|
||||
} as any)
|
||||
const ApiClassifyRoute = ApiClassifyRouteImport.update({
|
||||
id: '/api/classify',
|
||||
path: '/api/classify',
|
||||
getParentRoute: () => rootRouteImport,
|
||||
} as any)
|
||||
const PostsPostIdDeepRoute = PostsPostIdDeepRouteImport.update({
|
||||
id: '/posts_/$postId/deep',
|
||||
path: '/posts/$postId/deep',
|
||||
getParentRoute: () => rootRouteImport,
|
||||
} as any)
|
||||
|
||||
export interface FileRoutesByFullPath {
|
||||
'/': typeof IndexRoute
|
||||
'/users': typeof UsersRouteWithChildren
|
||||
'/api/classify': typeof ApiClassifyRoute
|
||||
'/posts/$postId': typeof PostsPostIdRoute
|
||||
'/users/$userId': typeof UsersUserIdRoute
|
||||
'/posts': typeof PostsIndexRoute
|
||||
'/users/': typeof UsersIndexRoute
|
||||
'/posts/$postId/deep': typeof PostsPostIdDeepRoute
|
||||
}
|
||||
export interface FileRoutesByTo {
|
||||
'/': typeof IndexRoute
|
||||
'/api/classify': typeof ApiClassifyRoute
|
||||
'/posts/$postId': typeof PostsPostIdRoute
|
||||
'/users/$userId': typeof UsersUserIdRoute
|
||||
'/posts': typeof PostsIndexRoute
|
||||
'/users': typeof UsersIndexRoute
|
||||
'/posts/$postId/deep': typeof PostsPostIdDeepRoute
|
||||
}
|
||||
export interface FileRoutesById {
|
||||
__root__: typeof rootRouteImport
|
||||
'/': typeof IndexRoute
|
||||
'/users': typeof UsersRouteWithChildren
|
||||
'/api/classify': typeof ApiClassifyRoute
|
||||
'/posts/$postId': typeof PostsPostIdRoute
|
||||
'/users/$userId': typeof UsersUserIdRoute
|
||||
'/posts/': typeof PostsIndexRoute
|
||||
'/users/': typeof UsersIndexRoute
|
||||
'/posts_/$postId/deep': typeof PostsPostIdDeepRoute
|
||||
}
|
||||
export interface FileRouteTypes {
|
||||
fileRoutesByFullPath: FileRoutesByFullPath
|
||||
fullPaths:
|
||||
| '/'
|
||||
| '/users'
|
||||
| '/api/classify'
|
||||
| '/posts/$postId'
|
||||
| '/users/$userId'
|
||||
| '/posts'
|
||||
| '/users/'
|
||||
| '/posts/$postId/deep'
|
||||
fileRoutesByTo: FileRoutesByTo
|
||||
to:
|
||||
| '/'
|
||||
| '/api/classify'
|
||||
| '/posts/$postId'
|
||||
| '/users/$userId'
|
||||
| '/posts'
|
||||
| '/users'
|
||||
| '/posts/$postId/deep'
|
||||
id:
|
||||
| '__root__'
|
||||
| '/'
|
||||
| '/users'
|
||||
| '/api/classify'
|
||||
| '/posts/$postId'
|
||||
| '/users/$userId'
|
||||
| '/posts/'
|
||||
| '/users/'
|
||||
| '/posts_/$postId/deep'
|
||||
fileRoutesById: FileRoutesById
|
||||
}
|
||||
export interface RootRouteChildren {
|
||||
IndexRoute: typeof IndexRoute
|
||||
UsersRoute: typeof UsersRouteWithChildren
|
||||
ApiClassifyRoute: typeof ApiClassifyRoute
|
||||
PostsPostIdRoute: typeof PostsPostIdRoute
|
||||
PostsIndexRoute: typeof PostsIndexRoute
|
||||
PostsPostIdDeepRoute: typeof PostsPostIdDeepRoute
|
||||
}
|
||||
|
||||
declare module '@tanstack/react-router' {
|
||||
interface FileRoutesByPath {
|
||||
'/users': {
|
||||
id: '/users'
|
||||
path: '/users'
|
||||
fullPath: '/users'
|
||||
preLoaderRoute: typeof UsersRouteImport
|
||||
parentRoute: typeof rootRouteImport
|
||||
}
|
||||
'/': {
|
||||
id: '/'
|
||||
path: '/'
|
||||
fullPath: '/'
|
||||
preLoaderRoute: typeof IndexRouteImport
|
||||
parentRoute: typeof rootRouteImport
|
||||
}
|
||||
'/users/': {
|
||||
id: '/users/'
|
||||
path: '/'
|
||||
fullPath: '/users/'
|
||||
preLoaderRoute: typeof UsersIndexRouteImport
|
||||
parentRoute: typeof UsersRoute
|
||||
}
|
||||
'/posts/': {
|
||||
id: '/posts/'
|
||||
path: '/posts'
|
||||
fullPath: '/posts'
|
||||
preLoaderRoute: typeof PostsIndexRouteImport
|
||||
parentRoute: typeof rootRouteImport
|
||||
}
|
||||
'/users/$userId': {
|
||||
id: '/users/$userId'
|
||||
path: '/$userId'
|
||||
fullPath: '/users/$userId'
|
||||
preLoaderRoute: typeof UsersUserIdRouteImport
|
||||
parentRoute: typeof UsersRoute
|
||||
}
|
||||
'/posts/$postId': {
|
||||
id: '/posts/$postId'
|
||||
path: '/posts/$postId'
|
||||
fullPath: '/posts/$postId'
|
||||
preLoaderRoute: typeof PostsPostIdRouteImport
|
||||
parentRoute: typeof rootRouteImport
|
||||
}
|
||||
'/api/classify': {
|
||||
id: '/api/classify'
|
||||
path: '/api/classify'
|
||||
fullPath: '/api/classify'
|
||||
preLoaderRoute: typeof ApiClassifyRouteImport
|
||||
parentRoute: typeof rootRouteImport
|
||||
}
|
||||
'/posts_/$postId/deep': {
|
||||
id: '/posts_/$postId/deep'
|
||||
path: '/posts/$postId/deep'
|
||||
fullPath: '/posts/$postId/deep'
|
||||
preLoaderRoute: typeof PostsPostIdDeepRouteImport
|
||||
parentRoute: typeof rootRouteImport
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface UsersRouteChildren {
|
||||
UsersUserIdRoute: typeof UsersUserIdRoute
|
||||
UsersIndexRoute: typeof UsersIndexRoute
|
||||
}
|
||||
|
||||
const UsersRouteChildren: UsersRouteChildren = {
|
||||
UsersUserIdRoute: UsersUserIdRoute,
|
||||
UsersIndexRoute: UsersIndexRoute,
|
||||
}
|
||||
|
||||
const UsersRouteWithChildren = UsersRoute._addFileChildren(UsersRouteChildren)
|
||||
|
||||
const rootRouteChildren: RootRouteChildren = {
|
||||
IndexRoute: IndexRoute,
|
||||
UsersRoute: UsersRouteWithChildren,
|
||||
ApiClassifyRoute: ApiClassifyRoute,
|
||||
PostsPostIdRoute: PostsPostIdRoute,
|
||||
PostsIndexRoute: PostsIndexRoute,
|
||||
PostsPostIdDeepRoute: PostsPostIdDeepRoute,
|
||||
}
|
||||
export const routeTree = rootRouteImport
|
||||
._addFileChildren(rootRouteChildren)
|
||||
._addFileTypes<FileRouteTypes>()
|
||||
|
||||
import type { getRouter } from './router.tsx'
|
||||
import type { createStart } from '@tanstack/react-start'
|
||||
declare module '@tanstack/react-start' {
|
||||
interface Register {
|
||||
ssr: true
|
||||
router: Awaited<ReturnType<typeof getRouter>>
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
import { createRouter } from '@tanstack/react-router'
|
||||
import { routeTree } from './routeTree.gen'
|
||||
import { DefaultCatchBoundary } from './components/DefaultCatchBoundary'
|
||||
import { NotFound } from './components/NotFound'
|
||||
|
||||
export function getRouter() {
|
||||
const router = createRouter({
|
||||
routeTree,
|
||||
defaultPreload: 'intent',
|
||||
defaultErrorComponent: DefaultCatchBoundary,
|
||||
defaultNotFoundComponent: () => <NotFound />,
|
||||
scrollRestoration: true,
|
||||
})
|
||||
return router
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
/// <reference types="vite/client" />
|
||||
import {
|
||||
HeadContent,
|
||||
Scripts,
|
||||
createRootRoute,
|
||||
} from '@tanstack/react-router'
|
||||
import * as React from 'react'
|
||||
import { DefaultCatchBoundary } from '~/components/DefaultCatchBoundary'
|
||||
import { NotFound } from '~/components/NotFound'
|
||||
import { seo } from '~/utils/seo'
|
||||
|
||||
export const Route = createRootRoute({
|
||||
head: () => ({
|
||||
meta: [
|
||||
{
|
||||
charSet: 'utf-8',
|
||||
},
|
||||
{
|
||||
name: 'viewport',
|
||||
content: 'width=device-width, initial-scale=1',
|
||||
},
|
||||
...seo({
|
||||
title:
|
||||
'Financial Documents Classification Agent',
|
||||
description: `Classify financial documents as balance sheets, income statements and cash flow statemets. `,
|
||||
}),
|
||||
],
|
||||
links: [
|
||||
{ rel: 'stylesheet', href: "https://cdn.jsdelivr.net/npm/daisyui@5" },
|
||||
{
|
||||
rel: 'apple-touch-icon',
|
||||
sizes: '180x180',
|
||||
href: '/apple-touch-icon.png',
|
||||
},
|
||||
{
|
||||
rel: 'icon',
|
||||
type: 'image/png',
|
||||
sizes: '32x32',
|
||||
href: '/favicon-32x32.png',
|
||||
},
|
||||
{
|
||||
rel: 'icon',
|
||||
type: 'image/png',
|
||||
sizes: '16x16',
|
||||
href: '/favicon-16x16.png',
|
||||
},
|
||||
{ rel: 'manifest', href: '/site.webmanifest', color: '#fffff' },
|
||||
{ rel: 'icon', href: '/favicon.ico' },
|
||||
],
|
||||
scripts: [
|
||||
{
|
||||
src: '/customScript.js',
|
||||
type: 'text/javascript',
|
||||
},
|
||||
{
|
||||
src: "https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4",
|
||||
type: "text/javascript",
|
||||
}
|
||||
],
|
||||
}),
|
||||
errorComponent: DefaultCatchBoundary,
|
||||
notFoundComponent: () => <NotFound />,
|
||||
shellComponent: RootDocument,
|
||||
})
|
||||
|
||||
function RootDocument({ children }: { children: React.ReactNode }) {
|
||||
return (
|
||||
<html>
|
||||
<head>
|
||||
<HeadContent />
|
||||
</head>
|
||||
<body>
|
||||
<div className="navbar bg-base-100 shadow-sm">
|
||||
<div className="navbar-start">
|
||||
<div className="dropdown">
|
||||
<div tabIndex={0} role="button" className="btn btn-ghost btn-circle">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
className="h-5 w-5"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
stroke="currentColor"
|
||||
>
|
||||
<path
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
strokeWidth="2"
|
||||
d="M4 6h16M4 12h16M4 18h7"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
<ul
|
||||
tabIndex={0}
|
||||
className="menu menu-lg dropdown-content bg-base-100 rounded-box z-1 mt-3 w-80 p-2 shadow"
|
||||
>
|
||||
<li><a href="/">Home</a></li>
|
||||
<li><a href="https://cloud.llamaindex.ai">Get Started with LlamaCloud</a></li>
|
||||
<li><a href="https://developers.llamaindex.ai/python/cloud/llamaclassify/getting_started/">LlamaClassify Docs</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div className="navbar-center">
|
||||
<a className="btn btn-ghost text-xl" href="/">Financial Documents Classification Agent</a>
|
||||
</div>
|
||||
<div className="navbar-end">
|
||||
<a href="https://github.com/run-llama/llama_cloud_services/main/blob/examples-ts/classify">
|
||||
<button className="btn btn-ghost btn-circle">
|
||||
<div className="indicator">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
className="h-10 w-10"
|
||||
fill="currentColor"
|
||||
viewBox="0 0 640 512"
|
||||
>
|
||||
<path d="M237.9 461.4C237.9 463.4 235.6 465 232.7 465C229.4 465.3 227.1 463.7 227.1 461.4C227.1 459.4 229.4 457.8 232.3 457.8C235.3 457.5 237.9 459.1 237.9 461.4zM206.8 456.9C206.1 458.9 208.1 461.2 211.1 461.8C213.7 462.8 216.7 461.8 217.3 459.8C217.9 457.8 216 455.5 213 454.6C210.4 453.9 207.5 454.9 206.8 456.9zM251 455.2C248.1 455.9 246.1 457.8 246.4 460.1C246.7 462.1 249.3 463.4 252.3 462.7C255.2 462 257.2 460.1 256.9 458.1C256.6 456.2 253.9 454.9 251 455.2zM316.8 72C178.1 72 72 177.3 72 316C72 426.9 141.8 521.8 241.5 555.2C254.3 557.5 258.8 549.6 258.8 543.1C258.8 536.9 258.5 502.7 258.5 481.7C258.5 481.7 188.5 496.7 173.8 451.9C173.8 451.9 162.4 422.8 146 415.3C146 415.3 123.1 399.6 147.6 399.9C147.6 399.9 172.5 401.9 186.2 425.7C208.1 464.3 244.8 453.2 259.1 446.6C261.4 430.6 267.9 419.5 275.1 412.9C219.2 406.7 162.8 398.6 162.8 302.4C162.8 274.9 170.4 261.1 186.4 243.5C183.8 237 175.3 210.2 189 175.6C209.9 169.1 258 202.6 258 202.6C278 197 299.5 194.1 320.8 194.1C342.1 194.1 363.6 197 383.6 202.6C383.6 202.6 431.7 169 452.6 175.6C466.3 210.3 457.8 237 455.2 243.5C471.2 261.2 481 275 481 302.4C481 398.9 422.1 406.6 366.2 412.9C375.4 420.8 383.2 435.8 383.2 459.3C383.2 493 382.9 534.7 382.9 542.9C382.9 549.4 387.5 557.3 400.2 555C500.2 521.8 568 426.9 568 316C568 177.3 455.5 72 316.8 72zM169.2 416.9C167.9 417.9 168.2 420.2 169.9 422.1C171.5 423.7 173.8 424.4 175.1 423.1C176.4 422.1 176.1 419.8 174.4 417.9C172.8 416.3 170.5 415.6 169.2 416.9zM158.4 408.8C157.7 410.1 158.7 411.7 160.7 412.7C162.3 413.7 164.3 413.4 165 412C165.7 410.7 164.7 409.1 162.7 408.1C160.7 407.5 159.1 407.8 158.4 408.8zM190.8 444.4C189.2 445.7 189.8 448.7 192.1 450.6C194.4 452.9 197.3 453.2 198.6 451.6C199.9 450.3 199.3 447.3 197.3 445.4C195.1 443.1 192.1 442.8 190.8 444.4zM179.4 429.7C177.8 430.7 177.8 433.3 179.4 435.6C181 437.9 183.7 438.9 185 437.9C186.6 436.6 186.6 434 185 431.7C183.6 429.4 181 428.4 179.4 429.7z" />
|
||||
</svg>
|
||||
</div>
|
||||
</button>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<hr />
|
||||
{children}
|
||||
<Scripts />
|
||||
</body>
|
||||
</html>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
import { createFileRoute } from '@tanstack/react-router'
|
||||
import { classifier, classificationRules, parsingConfig } from '~/utils/classifier'
|
||||
|
||||
export const Route = createFileRoute('/api/classify')({
|
||||
component: RouteComponent,
|
||||
server: {
|
||||
handlers: {
|
||||
POST: async ({ request }) => {
|
||||
const body = await request.formData()
|
||||
const fl = body.get("file") as File;
|
||||
if (!fl) {
|
||||
return new Response(JSON.stringify({"result": "you need to provide a file"}))
|
||||
}
|
||||
const buff = await fl.arrayBuffer()
|
||||
const rawRes = await classifier.classify(
|
||||
classificationRules,
|
||||
parsingConfig,
|
||||
{ fileContents: [new Uint8Array(buff)] },
|
||||
)
|
||||
const results = rawRes.items
|
||||
let classification = ""
|
||||
|
||||
for (const result of results) {
|
||||
if ("result" in result && result.result) {
|
||||
classification += `
|
||||
<div class="card bg-base-100 shadow-xl p-6 mb-4">
|
||||
<div class="space-y-3">
|
||||
<p><span class="font-semibold">📄 Document:</span> ${fl.name}</p>
|
||||
<p><span class="font-semibold">🏷️ Type:</span> <span class="badge badge-primary">${result.result.type}</span></p>
|
||||
<p><span class="font-semibold">📊 Confidence:</span> ${result.result.confidence*100}%</p>
|
||||
<p><span class="font-semibold">💭 Reasoning:</span> ${result.result.reasoning}</p>
|
||||
</div>
|
||||
</div>
|
||||
`
|
||||
}
|
||||
}
|
||||
return new Response(JSON.stringify({"result": classification}))
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
function RouteComponent() {
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
import { createFileRoute } from '@tanstack/react-router'
|
||||
import { useRef, useState } from 'react'
|
||||
|
||||
export const Route = createFileRoute('/')({
|
||||
component: Home,
|
||||
})
|
||||
|
||||
function Home() {
|
||||
const [file, setFile] = useState<null | File>(null)
|
||||
const fileInputRef = useRef<HTMLInputElement>(null)
|
||||
const [reply, setReply] = useState<null | string>(null)
|
||||
const [loading, setLoading] = useState<boolean>(false)
|
||||
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const selectedFile = event.target.files?.[0]
|
||||
if (selectedFile) {
|
||||
setFile(selectedFile)
|
||||
}
|
||||
}
|
||||
const handleClearFile = () => {
|
||||
if (file) {
|
||||
setFile(null)
|
||||
}
|
||||
if (fileInputRef.current) {
|
||||
fileInputRef.current.value = ''
|
||||
}
|
||||
if (reply) {
|
||||
setReply(null)
|
||||
}
|
||||
}
|
||||
|
||||
const handleClassify = async () => {
|
||||
if (!file) return
|
||||
|
||||
if (reply) {
|
||||
setReply(null)
|
||||
}
|
||||
setLoading(true)
|
||||
try {
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
|
||||
const res = await fetch('/api/classify', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
const data = await res.json()
|
||||
setReply(data.result)
|
||||
} catch (error) {
|
||||
console.error('Error:', error)
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col justify-center items-center gap-y-8">
|
||||
<br />
|
||||
<h1 className="text-xl font-bold text-gray-700">AI-Powered finacial document classification</h1>
|
||||
<h2 className="text-lg font-semibold text-gray-500">Need help sorting out the financial documents jungle? Let our classification agent handle it!</h2>
|
||||
<fieldset className="fieldset bg-base-100 border-base-300 rounded-box w-200 border p-4">
|
||||
<legend className="fieldset-legend text-lg">Upload your financial document here</legend>
|
||||
<label className="label flex justify-center">
|
||||
<input type="file" className="file-input" onChange={handleFileChange} accept='application/pdf' ref={fileInputRef} />
|
||||
</label>
|
||||
</fieldset>
|
||||
{file && (
|
||||
<div className="flex flex-col justify-center items-center gap-y-8">
|
||||
<p className="text-sm text-gray-600">Selected file: {file.name}</p>
|
||||
<div className='grid grid-cols-2 gap-x-6'>
|
||||
<button
|
||||
type="button"
|
||||
className='btn bg-gray-500 text-white shadow-lg hover:bg-gray-600 hover:shadow-xl rounded'
|
||||
onClick={handleClassify}
|
||||
>
|
||||
Classify
|
||||
</button>
|
||||
<button
|
||||
onClick={handleClearFile}
|
||||
type="button"
|
||||
className="px-4 py-2 bg-red-300 text-black rounded hover:bg-red-400 hover:shadow-xl shadow-lg"
|
||||
>
|
||||
Clear
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{loading && (
|
||||
<span className="loading loading-spinner text-primary"></span>
|
||||
)}
|
||||
{reply && (
|
||||
<div
|
||||
className="max-w-2xl w-full"
|
||||
dangerouslySetInnerHTML={{ __html: reply }}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
import { LlamaClassify, ClassifierRule, ClassifyParsingConfiguration } from "llama-cloud-services"
|
||||
|
||||
export const classifier = new LlamaClassify(process.env.LLAMA_CLOUD_API_KEY);
|
||||
|
||||
export const classificationRules: ClassifierRule[] = [
|
||||
{
|
||||
description: "Shows a company's assets, liabilities, and shareholders' equity at a specific point in time, providing a snapshot of financial position.",
|
||||
type: "balance_sheet"
|
||||
},
|
||||
{
|
||||
description: "Reports cash inflows and outflows from operating, investing, and financing activities, highlighting liquidity and cash management.",
|
||||
type: "cash_flow_statement"
|
||||
},
|
||||
{
|
||||
description: "Summarizes revenues, expenses, and profits over a period, indicating financial performance and profitability.",
|
||||
type: "income_statement"
|
||||
},
|
||||
];
|
||||
|
||||
export const parsingConfig: ClassifyParsingConfiguration = {
|
||||
lang: "en",
|
||||
max_pages: 20,
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
export const seo = ({
|
||||
title,
|
||||
description,
|
||||
keywords,
|
||||
image,
|
||||
}: {
|
||||
title: string
|
||||
description?: string
|
||||
image?: string
|
||||
keywords?: string
|
||||
}) => {
|
||||
const tags = [
|
||||
{ title },
|
||||
{ name: 'description', content: description },
|
||||
{ name: 'keywords', content: keywords },
|
||||
{ name: 'twitter:title', content: title },
|
||||
{ name: 'twitter:description', content: description },
|
||||
{ name: 'twitter:creator', content: '@tannerlinsley' },
|
||||
{ name: 'twitter:site', content: '@tannerlinsley' },
|
||||
{ name: 'og:type', content: 'website' },
|
||||
{ name: 'og:title', content: title },
|
||||
{ name: 'og:description', content: description },
|
||||
...(image
|
||||
? [
|
||||
{ name: 'twitter:image', content: image },
|
||||
{ name: 'twitter:card', content: 'summary_large_image' },
|
||||
{ name: 'og:image', content: image },
|
||||
]
|
||||
: []),
|
||||
]
|
||||
|
||||
return tags
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"include": ["**/*.ts", "**/*.tsx"],
|
||||
"compilerOptions": {
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"jsx": "react-jsx",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"lib": ["DOM", "DOM.Iterable", "ES2022"],
|
||||
"isolatedModules": true,
|
||||
"resolveJsonModule": true,
|
||||
"skipLibCheck": true,
|
||||
"target": "ES2022",
|
||||
"allowJs": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"~/*": ["./src/*"]
|
||||
},
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
import { tanstackStart } from '@tanstack/react-start/plugin/vite'
|
||||
import { defineConfig } from 'vite'
|
||||
import tsConfigPaths from 'vite-tsconfig-paths'
|
||||
import viteReact from '@vitejs/plugin-react'
|
||||
|
||||
export default defineConfig({
|
||||
server: {
|
||||
port: 3000,
|
||||
},
|
||||
plugins: [
|
||||
tsConfigPaths({
|
||||
projects: ['./tsconfig.json'],
|
||||
}),
|
||||
tanstackStart({
|
||||
srcDirectory: 'src',
|
||||
}),
|
||||
viteReact(),
|
||||
],
|
||||
})
|
||||
@@ -0,0 +1,122 @@
|
||||
# LlamaExtract Demo
|
||||
|
||||
A TypeScript demo application showcasing the power of **LlamaExract** - a structured data extraction agentic service from [LlamaCloud](https://cloud.llamaindex.ai). This demo allows you to extract structured information from scientific papers and get them into a nice markdown format.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Installation](#installation)
|
||||
- [Usage](#usage)
|
||||
- [Start the Demo](#start-the-demo)
|
||||
- [Development Mode](#development-mode)
|
||||
- [Build the Project](#build-the-project)
|
||||
- [Code Quality](#code-quality)
|
||||
- [Quick Commands Reference](#quick-commands-reference)
|
||||
- [How It Works](#how-it-works)
|
||||
- [API Dependencies](#api-dependencies)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Common Issues](#common-issues)
|
||||
- [License](#license)
|
||||
- [Contributing](#contributing)
|
||||
|
||||
## Features
|
||||
|
||||
- 📄 **Structured Data Extraction**: Extract data from your files effortlessly, and structure them the way you want!
|
||||
- 🤖 **Markdown Rendering**: Generate markdown directly from your extracted data
|
||||
- 🎨 **Beautiful CLI**: Styled console interface with colors and ASCII art
|
||||
- ⚡ **Fast Development**: Hot reload support with watch mode
|
||||
- 🛠️ **TypeScript**: Full TypeScript support with strict type checking
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js (version 18 or higher)
|
||||
- pnpm package manager
|
||||
- LlamaCloud API key
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/run-llama/llama_cloud_services
|
||||
cd lama_cloud_services/examples-ts/extract/
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
3. Set up your environment variables:
|
||||
|
||||
```bash
|
||||
# Add your API key to your environment
|
||||
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the Demo
|
||||
|
||||
```bash
|
||||
npm run start
|
||||
```
|
||||
|
||||
The application will display a welcome screen and prompt you to enter the path to a document you'd like to process.
|
||||
|
||||
### Development Mode
|
||||
|
||||
For development with hot reload:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
### Build the Project
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
Format code:
|
||||
|
||||
```bash
|
||||
npm run format
|
||||
```
|
||||
|
||||
Lint code:
|
||||
|
||||
```bash
|
||||
npm run lint
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Document Input**: Enter the path to your document when prompted
|
||||
2. **Parsing**: LlamaExtract, based on the schema you can find [here](./src/schema.ts), processes the document and extracts structured data
|
||||
3. **Markdown Rendering**: The extracted content is rendered into beautiful markdown
|
||||
4. **Results**: View the results directly in your terminal
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Module Resolution Errors**: Ensure you're using Node.js 18+ and have all dependencies installed
|
||||
2. **API Key Issues**: Verify your LlamaCloud API key is correctly set
|
||||
3. **File Path Errors**: Use absolute paths or ensure relative paths are correct from the project root
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see the [LICENSE](../../LICENSE) file for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Run `npm run format` and `npm run lint`
|
||||
5. Submit a pull request
|
||||
@@ -0,0 +1,14 @@
|
||||
import js from "@eslint/js";
|
||||
import globals from "globals";
|
||||
import tseslint from "typescript-eslint";
|
||||
import { defineConfig } from "eslint/config";
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
files: ["**/*.{js,mjs,cjs,ts,mts,cts}"],
|
||||
plugins: { js },
|
||||
extends: ["js/recommended"],
|
||||
languageOptions: { globals: globals.browser },
|
||||
},
|
||||
tseslint.configs.recommended,
|
||||
]);
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"name": "llama-extract-demo",
|
||||
"version": "0.1.0",
|
||||
"description": "Demo for LlamaExtract in TypeScript",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"There are no tests\"",
|
||||
"start": "npm exec tsx src/index.ts",
|
||||
"lint": "eslint ./src/",
|
||||
"format": "prettier --write ./src/",
|
||||
"build": "tsc",
|
||||
"dev": "npm exec tsx --watch src/index.ts"
|
||||
},
|
||||
"author": "LlamaIndex",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cli-markdown": "^3.5.1",
|
||||
"consola": "^3.4.2",
|
||||
"figlet": "^1.8.2",
|
||||
"llama-cloud-services": "file:../../ts/llama_cloud_services",
|
||||
"marked": "^15.0.12",
|
||||
"marked-terminal": "^7.3.0",
|
||||
"picocolors": "^1.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.32.0",
|
||||
"@types/figlet": "^1.7.0",
|
||||
"@types/marked-terminal": "^6.1.1",
|
||||
"@types/node": "^24.2.0",
|
||||
"eslint": "^9.32.0",
|
||||
"globals": "^16.3.0",
|
||||
"jiti": "^2.5.1",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.9.2",
|
||||
"typescript-eslint": "^8.39.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
import { LlamaExtract, ExtractConfig } from "llama-cloud-services";
|
||||
import cliMarkdown from "cli-markdown";
|
||||
import { logger } from "./logger";
|
||||
import pc from "picocolors";
|
||||
import { consoleInput, renderLogo } from "./utils";
|
||||
import { dataSchema } from "./schema";
|
||||
import { renderMarkdown, ResearchData } from "./markdown";
|
||||
|
||||
export async function main(): Promise<number> {
|
||||
const extractClient = new LlamaExtract(
|
||||
process.env.LLAMA_CLOUD_API_KEY!,
|
||||
"https://api.cloud.llamaindex.ai",
|
||||
);
|
||||
await renderLogo();
|
||||
logger.log(
|
||||
`Welcome to ${pc.bold(
|
||||
pc.magentaBright("LlamaExtract Demo✨"),
|
||||
)}, our demo for ${pc.bold(pc.green("LlamaExtract"))}, a ${pc.bold(
|
||||
pc.cyan("LlamaCloud☁️"),
|
||||
)} (https://cloud.llamaindex.ai) product!.\nIn this demo we are going to try extracting relevant information ${pc.bold(
|
||||
pc.yellowBright("from scientific papers"),
|
||||
)}. Type the path to the paper you would like to process below👇\nIf you wish to exit, just type ${pc.bold(
|
||||
pc.gray("quit"),
|
||||
)}.\n`,
|
||||
);
|
||||
while (true) {
|
||||
const userInput = await consoleInput();
|
||||
if (userInput.toLowerCase() == "quit") {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
const generatedData = await extractClient.extract(
|
||||
dataSchema,
|
||||
{} as ExtractConfig,
|
||||
userInput,
|
||||
);
|
||||
const research = renderMarkdown(generatedData?.data as ResearchData); // Added await here
|
||||
logger.log(`${pc.bold(pc.cyan("Extracted information:✨"))}:\n`);
|
||||
logger.log(cliMarkdown(research));
|
||||
} catch (error) {
|
||||
logger.error(`Error processing file: ${error}`);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,8 @@
|
||||
import { createConsola } from "consola";
|
||||
import type { ConsolaInstance } from "consola";
|
||||
|
||||
export const logger: ConsolaInstance = createConsola({
|
||||
formatOptions: {
|
||||
date: false,
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,172 @@
|
||||
type Author = {
|
||||
name: string;
|
||||
affiliation?: string;
|
||||
email?: string;
|
||||
};
|
||||
|
||||
type Methodology = {
|
||||
approach?: string;
|
||||
participants?: string;
|
||||
methods?: string[];
|
||||
};
|
||||
|
||||
type Result = {
|
||||
finding?: string;
|
||||
significance?: string;
|
||||
supportingData?: string;
|
||||
};
|
||||
|
||||
type Reference = {
|
||||
title: string;
|
||||
authors: string;
|
||||
year?: string;
|
||||
relevance?: string;
|
||||
};
|
||||
|
||||
type Discussion = {
|
||||
implications?: string[];
|
||||
limitations?: string[];
|
||||
futureWork?: string[];
|
||||
};
|
||||
|
||||
type Publication = {
|
||||
journal?: string;
|
||||
year: string;
|
||||
doi?: string;
|
||||
url?: string;
|
||||
};
|
||||
|
||||
export type ResearchData = {
|
||||
title: string;
|
||||
authors: Author[];
|
||||
abstract: string;
|
||||
keywords?: string[];
|
||||
mainFindings: string[];
|
||||
methodology?: Methodology;
|
||||
results?: Result[];
|
||||
discussion?: Discussion;
|
||||
references?: Reference[];
|
||||
publication?: Publication;
|
||||
};
|
||||
|
||||
export function renderMarkdown(data: ResearchData): string {
|
||||
const {
|
||||
title,
|
||||
authors,
|
||||
abstract,
|
||||
keywords,
|
||||
mainFindings,
|
||||
methodology,
|
||||
results,
|
||||
discussion,
|
||||
references,
|
||||
publication,
|
||||
} = data;
|
||||
|
||||
const md: string[] = [];
|
||||
|
||||
md.push(`# ${title}\n`);
|
||||
|
||||
// Authors
|
||||
md.push(`## Authors`);
|
||||
md.push(
|
||||
authors
|
||||
.map(
|
||||
(author) =>
|
||||
`- **${author.name}**${
|
||||
author.affiliation ? `, *${author.affiliation}*` : ""
|
||||
}${author.email ? ` (${author.email})` : ""}`,
|
||||
)
|
||||
.join("\n"),
|
||||
);
|
||||
|
||||
// Abstract
|
||||
md.push(`\n## Abstract\n${abstract}`);
|
||||
|
||||
// Keywords
|
||||
if (keywords && keywords.length > 0) {
|
||||
md.push(`\n## Keywords\n${keywords.map((k) => `- ${k}`).join("\n")}`);
|
||||
}
|
||||
|
||||
// Main Findings
|
||||
md.push(
|
||||
`\n## Main Findings\n${mainFindings.map((f) => `- ${f}`).join("\n")}`,
|
||||
);
|
||||
|
||||
// Methodology
|
||||
if (methodology) {
|
||||
md.push(`\n## Methodology`);
|
||||
if (methodology.approach) md.push(`**Approach:** ${methodology.approach}`);
|
||||
if (methodology.participants)
|
||||
md.push(`**Participants:** ${methodology.participants}`);
|
||||
if (methodology.methods?.length) {
|
||||
md.push(
|
||||
`**Methods:**\n${methodology.methods.map((m) => `- ${m}`).join("\n")}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Results
|
||||
if (results?.length) {
|
||||
md.push(`\n## Results`);
|
||||
results.forEach((result, i) => {
|
||||
md.push(`\n### Result ${i + 1}`);
|
||||
if (result.finding) md.push(`- **Finding:** ${result.finding}`);
|
||||
if (result.significance)
|
||||
md.push(`- **Significance:** ${result.significance}`);
|
||||
if (result.supportingData)
|
||||
md.push(`- **Supporting Data:** ${result.supportingData}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Discussion
|
||||
if (discussion) {
|
||||
md.push(`\n## Discussion`);
|
||||
if (discussion.implications?.length) {
|
||||
md.push(
|
||||
`### Implications\n${discussion.implications
|
||||
.map((d) => `- ${d}`)
|
||||
.join("\n")}`,
|
||||
);
|
||||
}
|
||||
if (discussion.limitations?.length) {
|
||||
md.push(
|
||||
`### Limitations\n${discussion.limitations
|
||||
.map((d) => `- ${d}`)
|
||||
.join("\n")}`,
|
||||
);
|
||||
}
|
||||
if (discussion.futureWork?.length) {
|
||||
md.push(
|
||||
`### Future Work\n${discussion.futureWork
|
||||
.map((d) => `- ${d}`)
|
||||
.join("\n")}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// References
|
||||
if (references?.length) {
|
||||
md.push(`\n## References`);
|
||||
references.forEach((ref, i) => {
|
||||
md.push(
|
||||
`\n**[${i + 1}]** ${ref.title} — *${ref.authors}*${
|
||||
ref.year ? ` (${ref.year})` : ""
|
||||
}`,
|
||||
);
|
||||
if (ref.relevance) md.push(`> ${ref.relevance}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Publication Info
|
||||
if (publication) {
|
||||
md.push(`\n## Publication`);
|
||||
if (publication.journal) md.push(`- **Journal:** ${publication.journal}`);
|
||||
if (publication.year) md.push(`- **Year:** ${publication.year}`);
|
||||
if (publication.doi) md.push(`- **DOI:** ${publication.doi}`);
|
||||
if (publication.url)
|
||||
md.push(`- **URL:** [${publication.url}](${publication.url})`);
|
||||
}
|
||||
|
||||
return md.join("\n");
|
||||
}
|
||||
@@ -0,0 +1,169 @@
|
||||
export const dataSchema = {
|
||||
type: "object",
|
||||
required: ["title", "authors", "abstract", "mainFindings"],
|
||||
properties: {
|
||||
title: {
|
||||
type: "string",
|
||||
description: "The full title of the research paper",
|
||||
},
|
||||
authors: {
|
||||
type: "array",
|
||||
description: "List of all authors of the paper",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
name: {
|
||||
type: "string",
|
||||
description: "Full name of the author",
|
||||
},
|
||||
affiliation: {
|
||||
type: "string",
|
||||
description:
|
||||
"Institution or organization the author is affiliated with",
|
||||
},
|
||||
email: {
|
||||
type: "string",
|
||||
description: "Contact email of the author if provided",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
abstract: {
|
||||
type: "string",
|
||||
description: "Complete abstract or summary of the paper",
|
||||
},
|
||||
keywords: {
|
||||
type: "array",
|
||||
description:
|
||||
"Key terms and phrases that describe the paper's main topics",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
mainFindings: {
|
||||
type: "array",
|
||||
description: "Key findings, conclusions, or contributions of the paper",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
methodology: {
|
||||
type: "object",
|
||||
description: "Research methods and approaches used",
|
||||
properties: {
|
||||
approach: {
|
||||
type: "string",
|
||||
description: "Overall research approach or study design",
|
||||
},
|
||||
participants: {
|
||||
type: "string",
|
||||
description: "Description of study participants or data sources",
|
||||
},
|
||||
methods: {
|
||||
type: "array",
|
||||
description: "Specific methods, techniques, or tools used",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
results: {
|
||||
type: "array",
|
||||
description: "Main results and outcomes of the research",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
finding: {
|
||||
type: "string",
|
||||
description: "Description of the specific result or finding",
|
||||
},
|
||||
significance: {
|
||||
type: "string",
|
||||
description:
|
||||
"Statistical significance or importance of the finding",
|
||||
},
|
||||
supportingData: {
|
||||
type: "string",
|
||||
description: "Relevant statistics, measurements, or data points",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discussion: {
|
||||
type: "object",
|
||||
properties: {
|
||||
implications: {
|
||||
type: "array",
|
||||
description: "Theoretical or practical implications of the findings",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
limitations: {
|
||||
type: "array",
|
||||
description: "Study limitations or constraints",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
futureWork: {
|
||||
type: "array",
|
||||
description: "Suggested future research directions",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
references: {
|
||||
type: "array",
|
||||
description:
|
||||
"Key papers cited that are crucial to understanding this work",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
title: {
|
||||
type: "string",
|
||||
description: "Title of the cited paper",
|
||||
},
|
||||
authors: {
|
||||
type: "string",
|
||||
description: "Authors of the cited paper",
|
||||
},
|
||||
year: {
|
||||
type: "string",
|
||||
description: "Publication year",
|
||||
},
|
||||
relevance: {
|
||||
type: "string",
|
||||
description: "Why this reference is important to the current paper",
|
||||
},
|
||||
},
|
||||
required: ["title", "authors"],
|
||||
},
|
||||
},
|
||||
publication: {
|
||||
type: "object",
|
||||
properties: {
|
||||
journal: {
|
||||
type: "string",
|
||||
description: "Name of the journal or conference",
|
||||
},
|
||||
year: {
|
||||
type: "string",
|
||||
description: "Year of publication",
|
||||
},
|
||||
doi: {
|
||||
type: "string",
|
||||
description: "Digital Object Identifier (DOI) of the paper",
|
||||
},
|
||||
url: {
|
||||
type: "string",
|
||||
description: "URL where the paper can be accessed",
|
||||
},
|
||||
},
|
||||
required: ["year"],
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,4 @@
|
||||
declare module "cli-markdown" {
|
||||
function cliMarkdown(input: string): string;
|
||||
export default cliMarkdown;
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
import * as readline from "readline/promises";
|
||||
import figlet from "figlet";
|
||||
import pc from "picocolors";
|
||||
|
||||
export async function renderLogo(): Promise<void> {
|
||||
const logoText = figlet.textSync("Extract Demo", {
|
||||
font: "ANSI Shadow",
|
||||
horizontalLayout: "default",
|
||||
verticalLayout: "default",
|
||||
width: 100,
|
||||
whitespaceBreak: true,
|
||||
});
|
||||
|
||||
// Add some styling with picocolors
|
||||
const styledLogo = pc.bold(pc.redBright(logoText));
|
||||
|
||||
// Add some padding/margin
|
||||
console.log("\n");
|
||||
console.log(styledLogo);
|
||||
console.log(pc.gray("─".repeat(60)));
|
||||
console.log("\n");
|
||||
}
|
||||
|
||||
export async function consoleInput(): Promise<string> {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
const answer = await rl.question("Path to your file: ");
|
||||
rl.close();
|
||||
return answer;
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
# LlamaCloud Index Demo
|
||||
|
||||
A TypeScript demo application showcasing the power of **LlamaCloud Index** - a fully automated document ingestion and retrieval serviced offered within [LlamaCloud](https://cloud.llamaindex.ai). This demo allows you to ask questions, retrieve relevant contextual information and generate AI-powered responses using OpenAI's GPT models.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Installation](#installation)
|
||||
- [Usage](#usage)
|
||||
- [Start the Demo](#start-the-demo)
|
||||
- [Development Mode](#development-mode)
|
||||
- [Build the Project](#build-the-project)
|
||||
- [Code Quality](#code-quality)
|
||||
- [Quick Commands Reference](#quick-commands-reference)
|
||||
- [How It Works](#how-it-works)
|
||||
- [API Dependencies](#api-dependencies)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Common Issues](#common-issues)
|
||||
- [License](#license)
|
||||
- [Contributing](#contributing)
|
||||
|
||||
## Features
|
||||
|
||||
- 🤖 **RAG**: Simple-yet-effective Retrieval Augmented Generation pipeline built on top of LlamaCloud Index and OpenAI
|
||||
- 🎨 **Beautiful CLI**: Styled console interface with colors and ASCII art
|
||||
- ⚡ **Fast Development**: Hot reload support with watch mode
|
||||
- 🛠️ **TypeScript**: Full TypeScript support with strict type checking
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js (version 18 or higher)
|
||||
- pnpm package manager
|
||||
- OpenAI API key
|
||||
- LlamaCloud API key
|
||||
- An existing LlamaCloud Index pipeline
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/run-llama/llama_cloud_services
|
||||
cd lama_cloud_services/examples-ts/index/
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
pnpm install
|
||||
```
|
||||
|
||||
3. Set up your environment variables:
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY="your-openai-api-key"
|
||||
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
export PIPELINE_NAME="your-pipeline-name"
|
||||
```
|
||||
|
||||
4. Or write them into a `.env` file:
|
||||
|
||||
```env
|
||||
OPENAI_API_KEY="your-openai-api-key"
|
||||
LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
PIPELINE_NAME="your-pipeline-name"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the Demo
|
||||
|
||||
```bash
|
||||
pnpm run start
|
||||
```
|
||||
|
||||
The application will display a welcome screen and prompt you to start chatting!
|
||||
|
||||
### Development Mode
|
||||
|
||||
For development with hot reload:
|
||||
|
||||
```bash
|
||||
pnpm run dev
|
||||
```
|
||||
|
||||
### Build the Project
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
Format code:
|
||||
|
||||
```bash
|
||||
pnpm run format
|
||||
```
|
||||
|
||||
Lint code:
|
||||
|
||||
```bash
|
||||
pnpm run lint
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Message Input**: Enter a message
|
||||
2. **Retrieval**: Several nodes are retrieved from the LlamaCloud index you specified
|
||||
3. **AI Response Generation**: The retrieved information is passed on to the AI model, along with its relevance score, and a reply to your original message is generated starting from that.
|
||||
4. **Results**: View the AI-generated summary in your terminal
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Module Resolution Errors**: Ensure you're using Node.js 18+ and have all dependencies installed
|
||||
2. **API Key Issues**: Verify your OpenAI and LlamaCloud API keys are correctly set
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see the [LICENSE](../../LICENSE) file for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Run `pnpm run format` and `pnpm run lint`
|
||||
5. Submit a pull request
|
||||
@@ -0,0 +1,15 @@
|
||||
import js from "@eslint/js";
|
||||
import globals from "globals";
|
||||
import tseslint from "typescript-eslint";
|
||||
import { defineConfig } from "eslint/config";
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
files: ["**/*.{js,mjs,cjs,ts,mts,cts}"],
|
||||
plugins: { js },
|
||||
extends: ["js/recommended"],
|
||||
languageOptions: { globals: globals.browser },
|
||||
},
|
||||
{ files: ["**/*.js"], languageOptions: { sourceType: "script" } },
|
||||
tseslint.configs.recommended,
|
||||
]);
|
||||
@@ -0,0 +1,48 @@
|
||||
{
|
||||
"name": "llama-chat",
|
||||
"version": "0.1.0",
|
||||
"description": "Demo for LlamaCloud Index in TypeScript",
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"There are no tests\"",
|
||||
"start": "pnpm exec tsx src/index.ts",
|
||||
"lint": "eslint ./src/",
|
||||
"format": "prettier --write ./src/",
|
||||
"build": "tsc",
|
||||
"dev": "pnpm exec tsx --watch src/index.ts"
|
||||
},
|
||||
"keywords": [
|
||||
"ai",
|
||||
"rag",
|
||||
"retrieval",
|
||||
"pipeline",
|
||||
"llms",
|
||||
"chatbot"
|
||||
],
|
||||
"author": "LlamaIndex",
|
||||
"license": "MIT",
|
||||
"packageManager": "pnpm@10.12.4",
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.32.0",
|
||||
"@types/figlet": "^1.7.0",
|
||||
"@types/node": "^24.1.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.38.0",
|
||||
"@typescript-eslint/parser": "^8.38.0",
|
||||
"eslint": "^9.32.0",
|
||||
"globals": "^16.3.0",
|
||||
"jiti": "^2.5.1",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.8.3",
|
||||
"typescript-eslint": "^8.38.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.3.23",
|
||||
"ai": "^4.3.19",
|
||||
"consola": "^3.4.2",
|
||||
"dotenv": "^17.2.1",
|
||||
"figlet": "^1.8.2",
|
||||
"llama-cloud-services": "link:../../ts/llama_cloud_services",
|
||||
"picocolors": "^1.1.1"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
import { LlamaCloudIndex } from "llama-cloud-services";
|
||||
import { logger } from "./logger";
|
||||
import pc from "picocolors";
|
||||
import {
|
||||
consoleInput,
|
||||
retrievalAugmentedGeneration,
|
||||
renderLogo,
|
||||
} from "./utils";
|
||||
import dotenv from "dotenv";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
export async function main(): Promise<number> {
|
||||
const index = new LlamaCloudIndex({
|
||||
name: process.env.PIPELINE_NAME as string,
|
||||
projectName: "Default",
|
||||
apiKey: process.env.LLAMA_CLOUD_API_KEY, // can provide API-key in the constructor or in the env
|
||||
});
|
||||
const retriever = index.asRetriever({
|
||||
similarityTopK: 5,
|
||||
});
|
||||
await renderLogo();
|
||||
logger.log(
|
||||
`Welcome to ${pc.bold(
|
||||
pc.magentaBright("✨LlamaChat✨"),
|
||||
)}, our demo for ${pc.bold(pc.green("Index🦙"))}, a ${pc.bold(
|
||||
pc.cyan("LlamaCloud☁️"),
|
||||
)} (https://cloud.llamaindex.ai) product!.\nType a question below, and you will get an answer!👇\nIf you wish to exit, just type ${pc.bold(
|
||||
pc.gray("quit"),
|
||||
)}.\n`,
|
||||
);
|
||||
while (true) {
|
||||
const userInput = await consoleInput();
|
||||
if (userInput.toLowerCase() == "quit") {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
const nodes = await retriever.retrieve(userInput);
|
||||
const summary = await retrievalAugmentedGeneration(nodes, userInput);
|
||||
logger.log(`${pc.bold(pc.magentaBright("LlamaChat✨:"))}\n${summary}`);
|
||||
} catch (error) {
|
||||
logger.error(`Error processing your request: ${error}`);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,8 @@
|
||||
import { createConsola } from "consola";
|
||||
import type { ConsolaInstance } from "consola";
|
||||
|
||||
export const logger: ConsolaInstance = createConsola({
|
||||
formatOptions: {
|
||||
date: false,
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,56 @@
|
||||
import { generateText } from "ai";
|
||||
import { openai } from "@ai-sdk/openai";
|
||||
import { NodeWithScore, MetadataMode } from "llamaindex";
|
||||
import * as readline from "readline/promises";
|
||||
import figlet from "figlet";
|
||||
import pc from "picocolors";
|
||||
|
||||
export async function renderLogo(): Promise<void> {
|
||||
const logoText = figlet.textSync("LlamaChat", {
|
||||
font: "ANSI Shadow",
|
||||
horizontalLayout: "default",
|
||||
verticalLayout: "default",
|
||||
width: 100,
|
||||
whitespaceBreak: true,
|
||||
});
|
||||
|
||||
// Add some styling with picocolors
|
||||
const styledLogo = pc.bold(pc.yellowBright(logoText));
|
||||
|
||||
// Add some padding/margin
|
||||
console.log("\n");
|
||||
console.log(styledLogo);
|
||||
console.log(pc.gray("─".repeat(60)));
|
||||
console.log("\n");
|
||||
}
|
||||
|
||||
export async function consoleInput(): Promise<string> {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
const answer = await rl.question(pc.cyanBright("You✨:"));
|
||||
rl.close();
|
||||
return answer;
|
||||
}
|
||||
|
||||
export async function retrievalAugmentedGeneration(
|
||||
nodes: NodeWithScore[],
|
||||
prompt: string,
|
||||
): Promise<string> {
|
||||
let mainText: string = "";
|
||||
|
||||
for (const node of nodes) {
|
||||
mainText += `\t{information: '${node.node.getContent(
|
||||
MetadataMode.ALL,
|
||||
)}', relevanceScore: '${node.score ?? "no score"}'}\n`;
|
||||
}
|
||||
|
||||
const { text } = await generateText({
|
||||
model: openai("gpt-4.1"),
|
||||
prompt: `[\n${mainText}\n]\n\nBased on the information you are given and on the relevance score of that (where -1 means no score available), answer to this user prompt: '${prompt}'`,
|
||||
});
|
||||
|
||||
return text;
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ES2022",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"types": ["node"],
|
||||
"moduleResolution": "bundler",
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
# LlamaParse Demo
|
||||
|
||||
A TypeScript demo application showcasing the power of **LlamaParse** - an intelligent document parsing service from [LlamaCloud](https://cloud.llamaindex.ai). This demo allows you to parse various document formats and generate AI-powered summaries using OpenAI's GPT models.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Installation](#installation)
|
||||
- [Usage](#usage)
|
||||
- [Start the Demo](#start-the-demo)
|
||||
- [Development Mode](#development-mode)
|
||||
- [Build the Project](#build-the-project)
|
||||
- [Code Quality](#code-quality)
|
||||
- [Quick Commands Reference](#quick-commands-reference)
|
||||
- [How It Works](#how-it-works)
|
||||
- [API Dependencies](#api-dependencies)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Common Issues](#common-issues)
|
||||
- [License](#license)
|
||||
- [Contributing](#contributing)
|
||||
|
||||
## Features
|
||||
|
||||
- 📄 **Document Parsing**: Parse PDFs, Word docs, and other formats using LlamaParse
|
||||
- 🤖 **AI Summaries**: Generate intelligent summaries using OpenAI GPT-4
|
||||
- 🎨 **Beautiful CLI**: Styled console interface with colors and ASCII art
|
||||
- ⚡ **Fast Development**: Hot reload support with watch mode
|
||||
- 🛠️ **TypeScript**: Full TypeScript support with strict type checking
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js (version 18 or higher)
|
||||
- pnpm package manager
|
||||
- OpenAI API key
|
||||
- LlamaCloud API key
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/run-llama/llama_cloud_services
|
||||
cd lama_cloud_services/examples-ts/parse/
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
pnpm install
|
||||
```
|
||||
|
||||
3. Set up your environment variables:
|
||||
|
||||
```bash
|
||||
# Add your API keys to your environment
|
||||
export OPENAI_API_KEY="your-openai-api-key"
|
||||
export LLAMA_CLOUD_API_KEY="your-llamacloud-api-key"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the Demo
|
||||
|
||||
```bash
|
||||
pnpm run start
|
||||
```
|
||||
|
||||
The application will display a welcome screen and prompt you to enter the path to a document you'd like to process.
|
||||
|
||||
### Development Mode
|
||||
|
||||
For development with hot reload:
|
||||
|
||||
```bash
|
||||
pnpm run dev
|
||||
```
|
||||
|
||||
### Build the Project
|
||||
|
||||
```bash
|
||||
pnpm run build
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
Format code:
|
||||
|
||||
```bash
|
||||
pnpm run format
|
||||
```
|
||||
|
||||
Lint code:
|
||||
|
||||
```bash
|
||||
pnpm run lint
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Document Input**: Enter the path to your document when prompted
|
||||
2. **Parsing**: LlamaParse processes the document and extracts structured content
|
||||
3. **AI Summary**: The extracted content is sent to OpenAI GPT-4 for summarization
|
||||
4. **Results**: View the AI-generated summary in your terminal
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Module Resolution Errors**: Ensure you're using Node.js 18+ and have all dependencies installed
|
||||
2. **API Key Issues**: Verify your OpenAI and LlamaCloud API keys are correctly set
|
||||
3. **File Path Errors**: Use absolute paths or ensure relative paths are correct from the project root
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see the [LICENSE](../../LICENSE) file for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Run `pnpm run format` and `pnpm run lint`
|
||||
5. Submit a pull request
|
||||
@@ -0,0 +1,15 @@
|
||||
import js from "@eslint/js";
|
||||
import globals from "globals";
|
||||
import tseslint from "typescript-eslint";
|
||||
import { defineConfig } from "eslint/config";
|
||||
|
||||
export default defineConfig([
|
||||
{
|
||||
files: ["**/*.{js,mjs,cjs,ts,mts,cts}"],
|
||||
plugins: { js },
|
||||
extends: ["js/recommended"],
|
||||
languageOptions: { globals: globals.browser },
|
||||
},
|
||||
{ files: ["**/*.js"], languageOptions: { sourceType: "script" } },
|
||||
tseslint.configs.recommended,
|
||||
]);
|
||||
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"name": "llamaparse-demo",
|
||||
"version": "0.1.0",
|
||||
"description": "Demo for LlamaParse in TypeScript",
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"There are no tests\"",
|
||||
"start": "pnpm exec tsx src/index.ts",
|
||||
"lint": "eslint ./src/",
|
||||
"format": "prettier --write ./src/",
|
||||
"build": "tsc",
|
||||
"dev": "pnpm exec tsx --watch src/index.ts"
|
||||
},
|
||||
"keywords": [
|
||||
"ai",
|
||||
"ocr",
|
||||
"parsing",
|
||||
"intelligent-document-processing",
|
||||
"pdf",
|
||||
"llms"
|
||||
],
|
||||
"author": "LlamaIndex",
|
||||
"license": "MIT",
|
||||
"packageManager": "pnpm@10.12.4",
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.32.0",
|
||||
"@types/figlet": "^1.7.0",
|
||||
"@types/node": "^24.1.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.38.0",
|
||||
"@typescript-eslint/parser": "^8.38.0",
|
||||
"eslint": "^9.32.0",
|
||||
"globals": "^16.3.0",
|
||||
"jiti": "^2.5.1",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.8.3",
|
||||
"typescript-eslint": "^8.38.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.3.23",
|
||||
"ai": "^4.3.19",
|
||||
"consola": "^3.4.2",
|
||||
"figlet": "^1.8.2",
|
||||
"llama-cloud-services": "link:../../ts/llama_cloud_services",
|
||||
"picocolors": "^1.1.1"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
import { LlamaParseReader } from "llama-cloud-services";
|
||||
import { logger } from "./logger";
|
||||
import pc from "picocolors";
|
||||
import { consoleInput, generateSummary, renderLogo } from "./utils";
|
||||
|
||||
export async function main(): Promise<number> {
|
||||
const reader = new LlamaParseReader({ resultType: "markdown" });
|
||||
await renderLogo();
|
||||
logger.log(
|
||||
`Welcome to ${pc.bold(
|
||||
pc.magentaBright("✨LlamaParse Demo✨"),
|
||||
)}, our demo for ${pc.bold(pc.green("LlamaParse🦙"))}, a ${pc.bold(
|
||||
pc.cyan("LlamaCloud☁️"),
|
||||
)} (https://cloud.llamaindex.ai) product!.\nType the path to the document you would like to process below👇\nIf you wish to exit, just type ${pc.bold(
|
||||
pc.gray("quit"),
|
||||
)}.\n`,
|
||||
);
|
||||
while (true) {
|
||||
const userInput = await consoleInput();
|
||||
if (userInput.toLowerCase() == "quit") {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
const documents = await reader.loadData(userInput);
|
||||
const summary = await generateSummary(documents); // Added await here
|
||||
logger.log(`${pc.bold(pc.cyan("AI-generated summary✨"))}:\n${summary}`);
|
||||
} catch (error) {
|
||||
logger.error(`Error processing file: ${error}`);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,8 @@
|
||||
import { createConsola } from "consola";
|
||||
import type { ConsolaInstance } from "consola";
|
||||
|
||||
export const logger: ConsolaInstance = createConsola({
|
||||
formatOptions: {
|
||||
date: false,
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,51 @@
|
||||
import { generateText } from "ai";
|
||||
import { openai } from "@ai-sdk/openai";
|
||||
import { Document } from "llamaindex";
|
||||
import * as readline from "readline/promises";
|
||||
import figlet from "figlet";
|
||||
import pc from "picocolors";
|
||||
|
||||
export async function renderLogo(): Promise<void> {
|
||||
const logoText = figlet.textSync("LlamaParse Demo", {
|
||||
font: "ANSI Shadow",
|
||||
horizontalLayout: "default",
|
||||
verticalLayout: "default",
|
||||
width: 100,
|
||||
whitespaceBreak: true,
|
||||
});
|
||||
|
||||
// Add some styling with picocolors
|
||||
const styledLogo = pc.bold(pc.magentaBright(logoText));
|
||||
|
||||
// Add some padding/margin
|
||||
console.log("\n");
|
||||
console.log(styledLogo);
|
||||
console.log(pc.gray("─".repeat(60)));
|
||||
console.log("\n");
|
||||
}
|
||||
|
||||
export async function consoleInput(): Promise<string> {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
const answer = await rl.question("Path to your file: ");
|
||||
rl.close();
|
||||
return answer;
|
||||
}
|
||||
|
||||
export async function generateSummary(documents: Document[]): Promise<string> {
|
||||
let mainText: string = "";
|
||||
|
||||
for (const document of documents) {
|
||||
mainText += `${document.text}\n\n---\n\n`;
|
||||
}
|
||||
|
||||
const { text } = await generateText({
|
||||
model: openai("gpt-4.1"),
|
||||
prompt: `</chat>\n\t<text>${mainText}</text>\n\t<instructions>Could you please generate a summary of the given text?</instructions>\n</chat>`,
|
||||
});
|
||||
|
||||
return text;
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ES2022",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"types": ["node"],
|
||||
"moduleResolution": "bundler",
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
# LlamaCloud Services Examples - Python
|
||||
> **⚠️ DEPRECATION NOTICE**
|
||||
>
|
||||
> This repository and its packages are deprecated and will be maintained until **May 1, 2026**.
|
||||
>
|
||||
> **Please migrate to the new packages:**
|
||||
> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))
|
||||
> - **TypeScript**: `npm install @llamaindex/llama-cloud` ([GitHub](https://github.com/run-llama/llama-cloud-ts))
|
||||
>
|
||||
> The new packages provide the same functionality with improved performance, better support, and active development.
|
||||
|
||||
|
||||
In this folder you will find several python notebooks that contain examples regarding:
|
||||
|
||||
- [LlamaParse](./parse/)
|
||||
- [LlamaExtract](./extract/)
|
||||
- [LlamaCloudIndex](./index/)
|
||||
|
||||
Follow the instructions in each notebook to get started!
|
||||
@@ -0,0 +1 @@
|
||||
sample_files/
|
||||
@@ -0,0 +1,815 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Batch Parse with LlamaCloud Directories\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use LlamaCloud's batch processing API to parse multiple files in a directory. The workflow includes:\n",
|
||||
"\n",
|
||||
"1. **Creating a Directory** - Set up a directory to organize your files\n",
|
||||
"2. **Uploading Files** - Upload multiple files to the directory\n",
|
||||
"3. **Starting a Batch Parse Job** - Kick off batch processing on all files\n",
|
||||
"4. **Monitoring Progress** - Check the status and view results\n",
|
||||
"\n",
|
||||
"This is useful when you need to parse many documents at once, as the batch API handles the orchestration and provides progress tracking."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c2b5e1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **⚠️ DEPRECATION NOTICE**>> This example uses the deprecated `llama-cloud-services` package, which will be maintained until **May 1, 2026**.>> **Please migrate to:**> - **Python**: `pip install llama-cloud>=1.0` ([GitHub](https://github.com/run-llama/llama-cloud-py))> - **New Package Documentation**: https://docs.cloud.llamaindex.ai/>> The new package provides the same functionality with improved performance and support."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup and Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-cloud python-dotenv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import httpx\n",
|
||||
"\n",
|
||||
"# Load environment variables\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"# Set your API key\n",
|
||||
"LLAMA_CLOUD_API_KEY = os.environ.get(\"LLAMA_CLOUD_API_KEY\", \"llx-...\")\n",
|
||||
"\n",
|
||||
"# Optional: Set base URL (defaults to https://api.cloud.llamaindex.ai if not set)\n",
|
||||
"LLAMA_CLOUD_BASE_URL = os.environ.get(\n",
|
||||
" \"LLAMA_CLOUD_BASE_URL\", \"https://api.cloud.llamaindex.ai\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Optional: Set project_id if you have one, otherwise it will use your default project\n",
|
||||
"PROJECT_ID = os.environ.get(\"LLAMA_CLOUD_PROJECT_ID\", None)\n",
|
||||
"\n",
|
||||
"print(\"✅ API key configured\")\n",
|
||||
"print(f\" Base URL: {LLAMA_CLOUD_BASE_URL}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup HTTP Client\n",
|
||||
"\n",
|
||||
"Since the current version of the llama-cloud SDK has some issues with the beta endpoints, we'll use direct HTTP requests with httpx for reliability."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create HTTP client with authentication\n",
|
||||
"headers = {\n",
|
||||
" \"Authorization\": f\"Bearer {LLAMA_CLOUD_API_KEY}\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(\"✅ HTTP client configured\")\n",
|
||||
"print(f\" Using base URL: {LLAMA_CLOUD_BASE_URL}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 1: Create a Directory\n",
|
||||
"\n",
|
||||
"First, we'll create a directory to organize our files. Directories help you group related files together for batch processing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"# Create a directory with a timestamp in the name\n",
|
||||
"timestamp = datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n",
|
||||
"directory_name = f\"batch-parse-demo-{timestamp}\"\n",
|
||||
"\n",
|
||||
"# Create directory using HTTP request\n",
|
||||
"response = httpx.post(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/directories\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": PROJECT_ID},\n",
|
||||
" json={\n",
|
||||
" \"name\": directory_name,\n",
|
||||
" \"description\": \"Demo directory for batch parse example\",\n",
|
||||
" },\n",
|
||||
" timeout=60.0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"if response.status_code in [200, 201]:\n",
|
||||
" directory = response.json()\n",
|
||||
" directory_id = directory[\"id\"]\n",
|
||||
" project_id = directory[\"project_id\"]\n",
|
||||
"\n",
|
||||
" print(f\"✅ Created directory: {directory['name']}\")\n",
|
||||
" print(f\" Directory ID: {directory_id}\")\n",
|
||||
" print(f\" Project ID: {project_id}\")\n",
|
||||
"else:\n",
|
||||
" raise Exception(\n",
|
||||
" f\"Failed to create directory: {response.status_code} - {response.text}\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 2: Upload Files to the Directory\n",
|
||||
"\n",
|
||||
"Now we'll upload some files to our directory. For this demo, we'll download some sample PDFs and upload them.\n",
|
||||
"\n",
|
||||
"You can replace these with your own files."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a directory for sample files\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"os.makedirs(\"sample_files\", exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Sample documents to download\n",
|
||||
"sample_docs = {\n",
|
||||
" \"attention.pdf\": \"https://arxiv.org/pdf/1706.03762.pdf\",\n",
|
||||
" \"bert.pdf\": \"https://arxiv.org/pdf/1810.04805.pdf\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Download sample documents\n",
|
||||
"for filename, url in sample_docs.items():\n",
|
||||
" filepath = f\"sample_files/{filename}\"\n",
|
||||
" if not os.path.exists(filepath):\n",
|
||||
" print(f\"📥 Downloading {filename}...\")\n",
|
||||
" response = requests.get(url)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" with open(filepath, \"wb\") as f:\n",
|
||||
" f.write(response.content)\n",
|
||||
" print(f\" ✅ Downloaded {filename}\")\n",
|
||||
" else:\n",
|
||||
" print(f\" ❌ Failed to download {filename}\")\n",
|
||||
" else:\n",
|
||||
" print(f\"📁 {filename} already exists\")\n",
|
||||
"\n",
|
||||
"print(\"\\n✅ Sample files ready!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-10",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Upload Files to Directory\n",
|
||||
"\n",
|
||||
"Now let's upload the files to our directory using the `upload_file_to_directory` endpoint."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-11",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"uploaded_files = []\n",
|
||||
"\n",
|
||||
"# Workaround: Use direct HTTP requests instead of SDK due to SDK bug\n",
|
||||
"import httpx\n",
|
||||
"\n",
|
||||
"for filename in os.listdir(\"sample_files\"):\n",
|
||||
" if filename.endswith(\".pdf\"):\n",
|
||||
" filepath = f\"sample_files/{filename}\"\n",
|
||||
"\n",
|
||||
" print(f\"📤 Uploading {filename}...\")\n",
|
||||
"\n",
|
||||
" # Upload file using direct HTTP request (SDK has a bug with file uploads)\n",
|
||||
" with open(filepath, \"rb\") as f:\n",
|
||||
" # Prepare the multipart form data correctly\n",
|
||||
" files = {\"upload_file\": (filename, f, \"application/pdf\")}\n",
|
||||
"\n",
|
||||
" # Make the request directly\n",
|
||||
" response = httpx.post(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/directories/{directory_id}/files/upload\",\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" files=files,\n",
|
||||
" headers={\"Authorization\": f\"Bearer {LLAMA_CLOUD_API_KEY}\"},\n",
|
||||
" timeout=60.0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if response.status_code in [200, 201]:\n",
|
||||
" directory_file = response.json()\n",
|
||||
" uploaded_files.append(directory_file)\n",
|
||||
" print(f\" ✅ Uploaded: {directory_file.get('display_name')}\")\n",
|
||||
" print(f\" File ID: {directory_file.get('id')}\")\n",
|
||||
" else:\n",
|
||||
" print(f\" ❌ Upload failed: {response.status_code}\")\n",
|
||||
" print(f\" Error: {response.text[:200]}\")\n",
|
||||
"\n",
|
||||
"print(f\"\\n✅ Uploaded {len(uploaded_files)} files to directory\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 3: Create a Batch Parse Job\n",
|
||||
"\n",
|
||||
"Now that we have files in our directory, let's create a batch parse job to process them all at once.\n",
|
||||
"\n",
|
||||
"The batch processing API uses the same configuration as LlamaParse."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-13",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Configure the parse job\n",
|
||||
"# This configuration will apply to all files in the directory\n",
|
||||
"job_config = {\n",
|
||||
" \"job_name\": \"parse_raw_file_job\", # Must match the JobNames enum value\n",
|
||||
" \"partitions\": {},\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"parse\",\n",
|
||||
" \"lang\": \"en\",\n",
|
||||
" \"fast_mode\": True,\n",
|
||||
" },\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(\"✅ Job configuration created\")\n",
|
||||
"print(f\" Language: {job_config['parameters']['lang']}\")\n",
|
||||
"print(f\" Fast mode: {job_config['parameters']['fast_mode']}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-14",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit the Batch Job\n",
|
||||
"\n",
|
||||
"Now let's submit the batch job to process all files in the directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"🚀 Submitting batch parse job for directory: {directory_id}\")\n",
|
||||
"print(f\" Processing {len(uploaded_files)} files...\\n\")\n",
|
||||
"\n",
|
||||
"# Submit batch job using HTTP request\n",
|
||||
"response = httpx.post(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" json={\n",
|
||||
" \"directory_id\": directory_id,\n",
|
||||
" \"job_config\": job_config,\n",
|
||||
" \"page_size\": 100, # Number of files to fetch per batch\n",
|
||||
" \"continue_as_new_threshold\": 10, # Workflow continuation threshold\n",
|
||||
" },\n",
|
||||
" timeout=60.0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"if response.status_code in [200, 201]:\n",
|
||||
" batch_job = response.json()\n",
|
||||
" batch_job_id = batch_job[\"id\"]\n",
|
||||
"\n",
|
||||
" print(\"✅ Batch job submitted successfully!\")\n",
|
||||
" print(f\" Batch Job ID: {batch_job_id}\")\n",
|
||||
" print(f\" Workflow ID: {batch_job.get('workflow_id')}\")\n",
|
||||
" print(f\" Status: {batch_job.get('status')}\")\n",
|
||||
" print(f\" Total Items: {batch_job.get('total_items')}\")\n",
|
||||
"else:\n",
|
||||
" raise Exception(\n",
|
||||
" f\"Failed to create batch job: {response.status_code} - {response.text}\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-16",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 4: Monitor Job Progress\n",
|
||||
"\n",
|
||||
"Now let's monitor the batch job progress. We'll poll the status endpoint to see how the job is progressing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def print_job_status(status_data):\n",
|
||||
" \"\"\"Helper function to print job status in a readable format.\"\"\"\n",
|
||||
" job = status_data[\"job\"]\n",
|
||||
" progress_pct = status_data[\"progress_percentage\"]\n",
|
||||
"\n",
|
||||
" print(f\"\\n{'='*60}\")\n",
|
||||
" print(f\"Job Status: {job['status']}\")\n",
|
||||
" print(f\"{'='*60}\")\n",
|
||||
" print(f\"Total Items: {job['total_items']}\")\n",
|
||||
" print(f\"Completed: {job['processed_items']}\")\n",
|
||||
" print(f\"Failed: {job['failed_items']}\")\n",
|
||||
" print(f\"Skipped: {job['skipped_items']}\")\n",
|
||||
" print(f\"Progress: {progress_pct:.1f}%\")\n",
|
||||
"\n",
|
||||
" if job.get(\"completed_at\"):\n",
|
||||
" print(f\"Completed At: {job['completed_at']}\")\n",
|
||||
" elif job.get(\"started_at\"):\n",
|
||||
" print(f\"Started At: {job['started_at']}\")\n",
|
||||
"\n",
|
||||
" print(f\"{'='*60}\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Poll for status updates\n",
|
||||
"print(\"🔄 Monitoring batch job progress...\")\n",
|
||||
"print(\n",
|
||||
" \"Note: It may take a few seconds for the workflow to initialize and count files.\\n\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"max_polls = 60 # Maximum number of status checks (increased for longer jobs)\n",
|
||||
"poll_interval = 10 # Seconds between checks\n",
|
||||
"\n",
|
||||
"for i in range(max_polls):\n",
|
||||
" response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing/{batch_job_id}\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" timeout=60.0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" status_data = response.json()\n",
|
||||
" print_job_status(status_data)\n",
|
||||
"\n",
|
||||
" # Check if job is complete\n",
|
||||
" job_status = status_data[\"job\"][\"status\"]\n",
|
||||
" if job_status in [\"completed\", \"failed\", \"cancelled\"]:\n",
|
||||
" print(f\"\\n✅ Job finished with status: {job_status}\")\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" if i < max_polls - 1:\n",
|
||||
" print(f\"\\n⏳ Waiting {poll_interval} seconds before next check...\")\n",
|
||||
" time.sleep(poll_interval)\n",
|
||||
" else:\n",
|
||||
" print(f\"Error getting status: {response.status_code} - {response.text}\")\n",
|
||||
" break\n",
|
||||
"else:\n",
|
||||
" print(f\"\\n⚠️ Reached maximum polling attempts. Job may still be running.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-18",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 5: View Job Items\n",
|
||||
"\n",
|
||||
"Let's look at the individual items in the batch job to see which files were processed successfully."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-19",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get all items in the batch job\n",
|
||||
"response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing/{batch_job_id}/items\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id, \"limit\": 100},\n",
|
||||
" timeout=60.0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"if response.status_code == 200:\n",
|
||||
" items_response = response.json()\n",
|
||||
"\n",
|
||||
" print(f\"\\n📋 Batch Job Items ({items_response['total_size']} total)\")\n",
|
||||
" print(f\"{'='*80}\\n\")\n",
|
||||
"\n",
|
||||
" for item in items_response[\"items\"]:\n",
|
||||
" status_emoji = (\n",
|
||||
" \"✅\"\n",
|
||||
" if item[\"status\"] == \"completed\"\n",
|
||||
" else \"❌\"\n",
|
||||
" if item[\"status\"] == \"failed\"\n",
|
||||
" else \"⏳\"\n",
|
||||
" )\n",
|
||||
" print(f\"{status_emoji} {item['item_name']}\")\n",
|
||||
" print(f\" Status: {item['status']}\")\n",
|
||||
" print(f\" Item ID: {item['item_id']}\")\n",
|
||||
"\n",
|
||||
" if item.get(\"error_message\"):\n",
|
||||
" print(f\" Error: {item['error_message']}\")\n",
|
||||
"\n",
|
||||
" print()\n",
|
||||
"else:\n",
|
||||
" print(f\"Error listing items: {response.status_code} - {response.text}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-20",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 6: Retrieve Processing Results\n",
|
||||
"\n",
|
||||
"For each completed file, we can retrieve the processing results to see where the parsed output is stored."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get processing results for a specific item\n",
|
||||
"if items_response[\"items\"]:\n",
|
||||
" first_item = items_response[\"items\"][0]\n",
|
||||
"\n",
|
||||
" print(f\"\\n🔍 Processing results for: {first_item['item_name']}\")\n",
|
||||
" print(f\"{'='*80}\\n\")\n",
|
||||
"\n",
|
||||
" response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing/items/{first_item['item_id']}/processing-results\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" timeout=60.0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" results = response.json()\n",
|
||||
"\n",
|
||||
" print(f\"Item: {results['item_name']}\")\n",
|
||||
" print(f\"Total processing runs: {len(results['processing_results'])}\\n\")\n",
|
||||
"\n",
|
||||
" for i, result in enumerate(results[\"processing_results\"], 1):\n",
|
||||
" print(f\"Run {i}:\")\n",
|
||||
" print(f\" Job Type: {result['job_type']}\")\n",
|
||||
" print(f\" Processed At: {result['processed_at']}\")\n",
|
||||
" print(f\" Parameters Hash: {result['parameters_hash']}\")\n",
|
||||
"\n",
|
||||
" if result.get(\"output_s3_path\"):\n",
|
||||
" print(f\" Output S3 Path: {result['output_s3_path']}\")\n",
|
||||
"\n",
|
||||
" if result.get(\"output_metadata\"):\n",
|
||||
" print(f\" Output Metadata: {result['output_metadata']}\")\n",
|
||||
"\n",
|
||||
" print()\n",
|
||||
" else:\n",
|
||||
" print(f\"Error getting results: {response.status_code} - {response.text}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cell-22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Optional: List All Batch Jobs\n",
|
||||
"\n",
|
||||
"You can also list all batch jobs in your project to see the history of batch processing operations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cell-23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# List all parse jobs in the project\n",
|
||||
"response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/beta/batch-processing\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id, \"job_type\": \"parse\", \"limit\": 10},\n",
|
||||
" timeout=60.0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"if response.status_code == 200:\n",
|
||||
" jobs_response = response.json()\n",
|
||||
"\n",
|
||||
" print(f\"\\n📊 Recent Batch Parse Jobs ({jobs_response['total_size']} total)\")\n",
|
||||
" print(f\"{'='*80}\\n\")\n",
|
||||
"\n",
|
||||
" for job in jobs_response[\"items\"]:\n",
|
||||
" status_emoji = (\n",
|
||||
" \"✅\"\n",
|
||||
" if job[\"status\"] == \"completed\"\n",
|
||||
" else \"❌\"\n",
|
||||
" if job[\"status\"] == \"failed\"\n",
|
||||
" else \"⏳\"\n",
|
||||
" )\n",
|
||||
" print(f\"{status_emoji} Job ID: {job['id']}\")\n",
|
||||
" print(f\" Status: {job['status']}\")\n",
|
||||
" print(f\" Directory: {job['directory_id']}\")\n",
|
||||
" print(f\" Total Items: {job['total_items']}\")\n",
|
||||
" print(f\" Completed: {job['processed_items']}\")\n",
|
||||
" print(f\" Created: {job['created_at']}\")\n",
|
||||
" print()\n",
|
||||
"else:\n",
|
||||
" print(f\"Error listing jobs: {response.status_code} - {response.text}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "uug7591rkq",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 7: Retrieve Parsed Text Results\n",
|
||||
"\n",
|
||||
"Once the batch job is complete, each BatchJobItem will have a `job_id` field that maps to a parse job ID. We can use this ID with the standard parse client methods to fetch the actual parsed text results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "vpp0vxtc0y",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get all completed items and their job IDs\n",
|
||||
"completed_items = [\n",
|
||||
" item for item in items_response[\"items\"] if item[\"status\"] == \"completed\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"print(f\"📄 Found {len(completed_items)} completed items\\n\")\n",
|
||||
"print(f\"{'='*80}\\n\")\n",
|
||||
"\n",
|
||||
"# Display the job_id for each completed item\n",
|
||||
"for item in completed_items:\n",
|
||||
" print(f\"📝 {item['item_name']}\")\n",
|
||||
" print(f\" Item ID: {item['item_id']}\")\n",
|
||||
" print(f\" Parse Job ID: {item['job_id']}\")\n",
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4gck6hwpnl6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Fetch Parsed Text for a Specific Document\n",
|
||||
"\n",
|
||||
"Now let's use the `job_id` to retrieve the actual parsed text content using the parse client methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "g191kvgxxvk",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the parsed text for the first completed item\n",
|
||||
"if completed_items:\n",
|
||||
" first_completed = completed_items[0]\n",
|
||||
"\n",
|
||||
" print(f\"📖 Retrieving parsed text for: {first_completed['item_name']}\")\n",
|
||||
" print(f\" Using Parse Job ID: {first_completed['job_id']}\\n\")\n",
|
||||
" print(f\"{'='*80}\\n\")\n",
|
||||
"\n",
|
||||
" # Use the job_id to fetch the parse result\n",
|
||||
" response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{first_completed['job_id']}/result/text\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" timeout=60.0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" parse_result = response.text\n",
|
||||
"\n",
|
||||
" print(f\"✅ Retrieved parsed text ({len(parse_result)} characters)\\n\")\n",
|
||||
"\n",
|
||||
" # Display first 1000 characters as a preview\n",
|
||||
" print(\"Preview (first 1000 characters):\")\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
" print(parse_result[:1000])\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
"\n",
|
||||
" if len(parse_result) > 1000:\n",
|
||||
" print(f\"\\n... and {len(parse_result) - 1000} more characters\")\n",
|
||||
" else:\n",
|
||||
" print(\n",
|
||||
" f\"Error retrieving parse result: {response.status_code} - {response.text}\"\n",
|
||||
" )\n",
|
||||
"else:\n",
|
||||
" print(\"⚠️ No completed items found to retrieve results from\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2olccb4l8fj",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve Parsed Results in Other Formats\n",
|
||||
"\n",
|
||||
"You can also retrieve the parsed results in JSON or Markdown format using different client methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "lcqsfxiw0sr",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if completed_items:\n",
|
||||
" first_completed = completed_items[0]\n",
|
||||
"\n",
|
||||
" print(\n",
|
||||
" f\"📋 Retrieving parse results in different formats for: {first_completed['item_name']}\\n\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Get as JSON (includes structured data with pages, images, etc.)\n",
|
||||
" print(\"1️⃣ Retrieving as JSON...\")\n",
|
||||
" response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{first_completed['job_id']}/result/json\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" timeout=60.0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" json_result = response.json()\n",
|
||||
" print(f\" ✅ JSON result with {len(json_result['pages'])} pages\")\n",
|
||||
" print(f\" Keys: {list(json_result.keys())}\\n\")\n",
|
||||
" else:\n",
|
||||
" print(f\" Error: {response.status_code}\\n\")\n",
|
||||
"\n",
|
||||
" # Get as Markdown\n",
|
||||
" print(\"2️⃣ Retrieving as Markdown...\")\n",
|
||||
" response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{first_completed['job_id']}/result/markdown\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" timeout=60.0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" markdown_result = response.text\n",
|
||||
" print(f\" ✅ Markdown result ({len(markdown_result)} characters)\\n\")\n",
|
||||
"\n",
|
||||
" # Display markdown preview\n",
|
||||
" print(\"Markdown Preview (first 500 characters):\")\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
" print(markdown_result[:500])\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
"\n",
|
||||
" if len(markdown_result) > 500:\n",
|
||||
" print(f\"\\n... and {len(markdown_result) - 500} more characters\")\n",
|
||||
" else:\n",
|
||||
" print(f\" Error: {response.status_code}\")\n",
|
||||
"else:\n",
|
||||
" print(\"⚠️ No completed items found to retrieve results from\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "lr61wqkfq3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Batch Process All Parsed Results\n",
|
||||
"\n",
|
||||
"You can also loop through all completed items to retrieve and process all the parsed results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "kltydf9xzkl",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Process all completed items\n",
|
||||
"print(f\"🔄 Processing all {len(completed_items)} completed items...\\n\")\n",
|
||||
"print(f\"{'='*80}\\n\")\n",
|
||||
"\n",
|
||||
"all_results = {}\n",
|
||||
"\n",
|
||||
"for item in completed_items:\n",
|
||||
" print(f\"📄 Processing: {item['item_name']}\")\n",
|
||||
" print(f\" Parse Job ID: {item['job_id']}\")\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Retrieve the parsed text for this item\n",
|
||||
" response = httpx.get(\n",
|
||||
" f\"{LLAMA_CLOUD_BASE_URL}/api/v1/parsing/job/{item['job_id']}/result/text\",\n",
|
||||
" headers=headers,\n",
|
||||
" params={\"project_id\": project_id},\n",
|
||||
" timeout=60.0,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" parsed_text = response.text\n",
|
||||
"\n",
|
||||
" all_results[item[\"item_name\"]] = {\n",
|
||||
" \"job_id\": item[\"job_id\"],\n",
|
||||
" \"text\": parsed_text,\n",
|
||||
" \"length\": len(parsed_text),\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" print(f\" ✅ Retrieved {len(parsed_text)} characters\")\n",
|
||||
" else:\n",
|
||||
" all_results[item[\"item_name\"]] = {\n",
|
||||
" \"job_id\": item[\"job_id\"],\n",
|
||||
" \"error\": f\"HTTP {response.status_code}\",\n",
|
||||
" }\n",
|
||||
" print(f\" ❌ Error: HTTP {response.status_code}\")\n",
|
||||
"\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\" ❌ Error: {str(e)}\")\n",
|
||||
" all_results[item[\"item_name\"]] = {\"job_id\": item[\"job_id\"], \"error\": str(e)}\n",
|
||||
"\n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"print(f\"{'='*80}\")\n",
|
||||
"print(f\"\\n✅ Processed {len(all_results)} items\")\n",
|
||||
"print(f\"\\nSummary:\")\n",
|
||||
"for name, result in all_results.items():\n",
|
||||
" if \"error\" in result:\n",
|
||||
" print(f\" ❌ {name}: Error - {result['error']}\")\n",
|
||||
" else:\n",
|
||||
" print(f\" ✅ {name}: {result['length']:,} characters\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,138 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using the Raw API\n",
|
||||
"\n",
|
||||
"This notebook walks through how to use the raw API and how"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2024-02-02 11:11:39-- https://arxiv.org/pdf/1706.03762.pdf\n",
|
||||
"Resolving arxiv.org (arxiv.org)... 151.101.131.42, 151.101.3.42, 151.101.67.42, ...\n",
|
||||
"Connecting to arxiv.org (arxiv.org)|151.101.131.42|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 2215244 (2.1M) [application/pdf]\n",
|
||||
"Saving to: ‘./attention.pdf’\n",
|
||||
"\n",
|
||||
"./attention.pdf 100%[===================>] 2.11M --.-KB/s in 0.08s \n",
|
||||
"\n",
|
||||
"2024-02-02 11:11:39 (27.3 MB/s) - ‘./attention.pdf’ saved [2215244/2215244]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!wget \"https://arxiv.org/pdf/1706.03762.pdf\" -O \"./attention.pdf\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"api_key = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import mimetypes\n",
|
||||
"import requests\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"headers = {\"Authorization\": f\"Bearer {api_key}\"}\n",
|
||||
"file_path = \"./attention.pdf\"\n",
|
||||
"base_url = \"https://api.cloud.llamaindex.ai/api/parsing\"\n",
|
||||
"\n",
|
||||
"with open(file_path, \"rb\") as f:\n",
|
||||
" mime_type = mimetypes.guess_type(file_path)[0]\n",
|
||||
" files = {\"file\": (f.name, f, mime_type)}\n",
|
||||
"\n",
|
||||
" # send the request, upload the file\n",
|
||||
" url = f\"{base_url}/upload\"\n",
|
||||
" response = requests.post(url, headers=headers, files=files)\n",
|
||||
"\n",
|
||||
"response.raise_for_status()\n",
|
||||
"# get the job id for the result_url\n",
|
||||
"job_id = response.json()[\"id\"]\n",
|
||||
"result_type = \"text\" # or \"markdown\"\n",
|
||||
"result_url = f\"{base_url}/job/{job_id}/result/{result_type}\"\n",
|
||||
"\n",
|
||||
"# check for the result until its ready\n",
|
||||
"while True:\n",
|
||||
" response = requests.get(result_url, headers=headers)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" time.sleep(2)\n",
|
||||
"\n",
|
||||
"# download the result\n",
|
||||
"result = response.json()\n",
|
||||
"output = result[result_type]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Provided proper attribution is provided, Google hereby grants permission to\n",
|
||||
" reproduce the tables and figures in this paper solely for use in journalistic or\n",
|
||||
" scholarly works.\n",
|
||||
" Attention Is All You Need\n",
|
||||
"arXiv:1706.03762v7 [cs.CL] 2 Aug 2023\n",
|
||||
" Ashish Vaswani∗ Noam Shazeer∗ Niki Parmar∗ Jakob Uszkoreit∗\n",
|
||||
" Google Brain Google Brain Google Research Google Research\n",
|
||||
" avaswani@google.com noam@google.com nikip@google.com usz@google.com\n",
|
||||
" Llion Jones∗ Aidan N. Gomez∗ † Łukasz Kaiser∗\n",
|
||||
" Google Research University of Toronto \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(output[:1000])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama-parse-aNC435Vv-py3.11",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,191 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Llama Parser Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install llama-index llama-parser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2024-02-02 11:10:10-- https://arxiv.org/pdf/1706.03762.pdf\n",
|
||||
"Resolving arxiv.org (arxiv.org)... 151.101.131.42, 151.101.3.42, 151.101.67.42, ...\n",
|
||||
"Connecting to arxiv.org (arxiv.org)|151.101.131.42|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 2215244 (2.1M) [application/pdf]\n",
|
||||
"Saving to: ‘./attention.pdf’\n",
|
||||
"\n",
|
||||
"./attention.pdf 100%[===================>] 2.11M --.-KB/s in 0.08s \n",
|
||||
"\n",
|
||||
"2024-02-02 11:10:10 (25.9 MB/s) - ‘./attention.pdf’ saved [2215244/2215244]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!wget \"https://arxiv.org/pdf/1706.03762.pdf\" -O \"./attention.pdf\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# llama-parser is async-first, running the sync code in a notebook requires the use of nest_asyncio\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id dd0b8e31-0c09-4497-b78a-cc1c92f1d6cf\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_parser import LlamaParser\n",
|
||||
"\n",
|
||||
"documents = LlamaParser(result_type=\"text\").load_data(\"./attention.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ad\n",
|
||||
"relying entirely on an attention mechanism to draw global dependencies between input and output.\n",
|
||||
"The Transformer allows for significantly more parallelization and can reach a new state of the art in\n",
|
||||
"translation quality after being trained for as little as twelve hours on eight P100 GPUs.\n",
|
||||
"2 Background\n",
|
||||
"The goal of reducing sequential computation also forms the foundation of the Extended Neural GPU\n",
|
||||
"[16], ByteNet [18] and ConvS2S [9], all of which use convolutional neural networks as basic building\n",
|
||||
"block, computing hidden representations in parallel for all input and output positions. In these models,\n",
|
||||
"the number of operations required to relate signals from two arbitrary input or output positions grows\n",
|
||||
"in the distance between positions, linearly for ConvS2S and logarithmically for ByteNet. This makes\n",
|
||||
"it more difficult to learn dependencies between distant positions [12]. In the Transformer this is\n",
|
||||
"reduced to a constant number of operations, albeit at the cost of reduced effective res\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(documents[0].text[6000:7000])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id d4531453-1bbb-48c4-8324-ae9fea9f2fa2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_parser import LlamaParser\n",
|
||||
"\n",
|
||||
"documents = LlamaParser(result_type=\"markdown\").load_data(\"./attention.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ction describes the training regime for our models.\n",
|
||||
"\n",
|
||||
"##### Training Data and Batching\n",
|
||||
"\n",
|
||||
"We trained on the standard WMT 2014 English-German dataset consisting of about 4.5 million\n",
|
||||
"sentence pairs. Sentences were encoded using byte-pair encoding [3], which has a shared source-\n",
|
||||
"target vocabulary of about 37000 tokens. For English-French, we used the significantly larger WMT\n",
|
||||
"2014 English-French dataset consisting of 36M sentences and split tokens into a 32000 word-piece\n",
|
||||
"vocabulary [38]. Sentence pairs were batched together by approximate sequence length. Each training\n",
|
||||
"batch contained a set of sentence pairs containing approximately 25000 source tokens and 25000\n",
|
||||
"target tokens.\n",
|
||||
"\n",
|
||||
"##### Hardware and Schedule\n",
|
||||
"\n",
|
||||
"We trained our models on one machine with 8 NVIDIA P100 GPUs. For our base models using\n",
|
||||
"the hyperparameters described throughout the paper, each training step took about 0.4 seconds. We\n",
|
||||
"trained the base models for a total of 100,000 steps or 12 hours. For our big models,(described on the\n",
|
||||
"bo...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(documents[0].text[20000:21000] + \"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama-parse-aNC435Vv-py3.11",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
After Width: | Height: | Size: 3.3 MiB |
@@ -0,0 +1,10 @@
|
||||
# Financial Modeling Assumptions
|
||||
Discount Rate: 8%
|
||||
Terminal Growth Rate: 2%
|
||||
Tax Rate: 25%
|
||||
Revenue Growth (Years 1-5): 10% per annum
|
||||
Revenue Growth (Years 6-10): 5% per annum
|
||||
Capital Expenditures as % of Revenue: 7%
|
||||
Working Capital Assumption: 3% of Revenue
|
||||
Depreciation Rate: 10% per annum
|
||||
Cost of Capital Assumption: 8%
|
||||
|
After Width: | Height: | Size: 67 KiB |
@@ -0,0 +1 @@
|
||||
sec_form_4_dump.json
|
||||
|
After Width: | Height: | Size: 202 KiB |
|
After Width: | Height: | Size: 440 KiB |
|
After Width: | Height: | Size: 156 KiB |
|
After Width: | Height: | Size: 85 KiB |
|
After Width: | Height: | Size: 893 KiB |
|
After Width: | Height: | Size: 287 KiB |
|
After Width: | Height: | Size: 769 KiB |