mirror of
https://github.com/run-llama/llama_cloud_services.git
synced 2026-07-01 21:44:37 -04:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 188a974974 | |||
| f470c1b861 | |||
| 008e87b75d | |||
| 7dd9672bf0 | |||
| bbce13e862 |
@@ -19,7 +19,12 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This helps provide a solution to the common chunking problem of retrieving chunks that are only subsets of the entire section you're meant to retrieve."
|
||||
"This helps provide a solution to the common chunking problem of retrieving chunks that are only subsets of the entire section you're meant to retrieve.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,18 +37,6 @@
|
||||
"Install core packages and download relevant files. Here we load some popular ICLR 2024 papers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "71bd0714-324f-48b3-8a93-72c6c3a10b53",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -51,8 +44,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install llama-index\n",
|
||||
"!pip install llama-index-core\n",
|
||||
"!pip install \"llama-index>=0.13.0<0.14.0\" \"llama-index-vector-stores-chroma>=0.5.1<0.6.0\"\n",
|
||||
"!pip install llama-cloud-services"
|
||||
]
|
||||
},
|
||||
@@ -101,48 +93,7 @@
|
||||
"execution_count": null,
|
||||
"id": "80137d15-f22b-47eb-adce-ac295ced7e71",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mkdir: iclr_docs: File exists\n",
|
||||
"--2024-11-10 16:18:56-- https://openreview.net/pdf?id=VTF8yNQM66\n",
|
||||
"Resolving openreview.net (openreview.net)... 35.184.86.251\n",
|
||||
"Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 2680380 (2.6M) [application/pdf]\n",
|
||||
"Saving to: ‘iclr_docs/swebench.pdf’\n",
|
||||
"\n",
|
||||
"iclr_docs/swebench. 100%[===================>] 2.56M 7.22MB/s in 0.4s \n",
|
||||
"\n",
|
||||
"2024-11-10 16:18:57 (7.22 MB/s) - ‘iclr_docs/swebench.pdf’ saved [2680380/2680380]\n",
|
||||
"\n",
|
||||
"--2024-11-10 16:18:57-- https://openreview.net/pdf?id=hSyW5go0v8\n",
|
||||
"Resolving openreview.net (openreview.net)... 35.184.86.251\n",
|
||||
"Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 1244749 (1.2M) [application/pdf]\n",
|
||||
"Saving to: ‘iclr_docs/selfrag.pdf’\n",
|
||||
"\n",
|
||||
"iclr_docs/selfrag.p 100%[===================>] 1.19M 4.21MB/s in 0.3s \n",
|
||||
"\n",
|
||||
"2024-11-10 16:18:58 (4.21 MB/s) - ‘iclr_docs/selfrag.pdf’ saved [1244749/1244749]\n",
|
||||
"\n",
|
||||
"--2024-11-10 16:18:58-- https://openreview.net/pdf?id=c5pwL0Soay\n",
|
||||
"Resolving openreview.net (openreview.net)... 35.184.86.251\n",
|
||||
"Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 4775879 (4.6M) [application/pdf]\n",
|
||||
"Saving to: ‘iclr_docs/metra.pdf’\n",
|
||||
"\n",
|
||||
"iclr_docs/metra.pdf 100%[===================>] 4.55M 4.06MB/s in 1.1s \n",
|
||||
"\n",
|
||||
"2024-11-10 16:19:00 (4.06 MB/s) - ‘iclr_docs/metra.pdf’ saved [4775879/4775879]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!mkdir \"{data_dir}\"\n",
|
||||
"for url, paper in zip(urls, papers):\n",
|
||||
@@ -168,8 +119,8 @@
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"\n",
|
||||
"embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\")\n",
|
||||
"llm = OpenAI(model=\"gpt-4o\")\n",
|
||||
"embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\", api_key=\"sk-...\")\n",
|
||||
"llm = OpenAI(model=\"gpt-5-mini\", api_key=\"sk-...\")\n",
|
||||
"\n",
|
||||
"Settings.embed_model = embed_model\n",
|
||||
"Settings.llm = llm"
|
||||
@@ -192,7 +143,15 @@
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(result_type=\"markdown\")"
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" api_key=\"llx-...\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -201,30 +160,56 @@
|
||||
"id": "f9d6f0e8-323e-4786-a4a8-e393441ecd61",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 0%| | 0/3 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 827f328d-b72e-4b70-8b4b-47dbba859d69\n",
|
||||
"Started parsing the file under job_id d3104cd5-731e-4def-bdbc-889e8731989c\n",
|
||||
"Started parsing the file under job_id 6046274e-e522-46af-9185-3c036e9c3ad6\n"
|
||||
"Started parsing the file under job_id d8f0df2d-5b55-4e4f-bbe9-81cf4b8a4782\n",
|
||||
"Started parsing the file under job_id 6aef247f-f548-43f5-9ddb-cf8ba8373130\n",
|
||||
"Started parsing the file under job_id 5c1c4baf-fa43-4ed4-b671-16c45f99461c\n",
|
||||
"..."
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 67%|██████▋ | 2/3 [01:40<00:46, 46.97s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"....."
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 100%|██████████| 3/3 [05:49<00:00, 116.59s/it]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"paper_dicts = {}\n",
|
||||
"\n",
|
||||
"paths_to_parse = []\n",
|
||||
"for paper_path in papers:\n",
|
||||
" paper_base = Path(paper_path).stem\n",
|
||||
" full_paper_path = str(Path(data_dir) / paper_path)\n",
|
||||
" md_json_objs = parser.get_json_result(full_paper_path)\n",
|
||||
" json_dicts = md_json_objs[0][\"pages\"]\n",
|
||||
" paper_dicts[paper_path] = {\n",
|
||||
" \"paper_path\": full_paper_path,\n",
|
||||
" \"json_dicts\": json_dicts,\n",
|
||||
" }"
|
||||
" paths_to_parse.append(full_paper_path)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"results = await parser.aparse(paths_to_parse)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -234,44 +219,7 @@
|
||||
"source": [
|
||||
"#### Get Text Nodes\n",
|
||||
"\n",
|
||||
"Convert the dictionary above into TextNode objects that we can put into a vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "18c24174-05ce-417f-8dd2-79c3f375db03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"from typing import Optional"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e331dfe-a627-4e23-8c57-70ab1d9342e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NOTE: these are utility functions to sort the dumped images by the page number\n",
|
||||
"# (they are formatted like \"{uuid}-{page_num}.jpg\"\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_page_number(file_name):\n",
|
||||
" match = re.search(r\"-page-(\\d+)\\.jpg$\", str(file_name))\n",
|
||||
" if match:\n",
|
||||
" return int(match.group(1))\n",
|
||||
" return 0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _get_sorted_image_files(image_dir):\n",
|
||||
" \"\"\"Get image files sorted by page.\"\"\"\n",
|
||||
" raw_files = [f for f in list(Path(image_dir).iterdir()) if f.is_file()]\n",
|
||||
" sorted_files = sorted(raw_files, key=get_page_number)\n",
|
||||
" return sorted_files"
|
||||
"Using each result object, we can create a list of text nodes with metadata attached."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -281,21 +229,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from copy import deepcopy\n",
|
||||
"from pathlib import Path\n",
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# attach image metadata to the text nodes\n",
|
||||
"def get_text_nodes(json_dicts, paper_path):\n",
|
||||
"def get_text_nodes(result):\n",
|
||||
" \"\"\"Split docs into nodes, by separator.\"\"\"\n",
|
||||
" nodes = []\n",
|
||||
"\n",
|
||||
" md_texts = [d[\"md\"] for d in json_dicts]\n",
|
||||
" md_texts = [page.md for page in result.pages]\n",
|
||||
"\n",
|
||||
" for idx, md_text in enumerate(md_texts):\n",
|
||||
" chunk_metadata = {\n",
|
||||
" \"page_num\": idx + 1,\n",
|
||||
" \"paper_path\": paper_path,\n",
|
||||
" \"paper_path\": result.file_name,\n",
|
||||
" }\n",
|
||||
" node = TextNode(\n",
|
||||
" text=md_text,\n",
|
||||
@@ -316,11 +263,28 @@
|
||||
"# this will combine all nodes from all papers into a single list\n",
|
||||
"all_text_nodes = []\n",
|
||||
"text_nodes_dict = {}\n",
|
||||
"for paper_path, paper_dict in paper_dicts.items():\n",
|
||||
" json_dicts = paper_dict[\"json_dicts\"]\n",
|
||||
" text_nodes = get_text_nodes(json_dicts, paper_dict[\"paper_path\"])\n",
|
||||
"for result in results:\n",
|
||||
" text_nodes = get_text_nodes(result)\n",
|
||||
" all_text_nodes.extend(text_nodes)\n",
|
||||
" text_nodes_dict[paper_path] = text_nodes"
|
||||
" text_nodes_dict[result.file_name] = text_nodes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2e8fb9df",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"106\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(len(all_text_nodes))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -442,18 +406,15 @@
|
||||
" The user will give the document text below.\n",
|
||||
" \n",
|
||||
" \"\"\"\n",
|
||||
" llm = llm or OpenAI(model=\"gpt-4o\")\n",
|
||||
" llm = llm or OpenAI(model=\"gpt-5-mini\", api_key=\"sk-...\")\n",
|
||||
" sllm = llm.as_structured_llm(SectionsOutput)\n",
|
||||
"\n",
|
||||
" chat_template = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" ChatMessage.from_str(system_prompt, \"system\"),\n",
|
||||
" ChatMessage.from_str(\"Document text: {doc_text}\", \"user\"),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" result = await llm.astructured_predict(\n",
|
||||
" SectionsOutput, chat_template, doc_text=doc_text\n",
|
||||
" )\n",
|
||||
" return result.sections\n",
|
||||
" messages = [\n",
|
||||
" ChatMessage(content=system_prompt, role=\"system\"),\n",
|
||||
" ChatMessage(content=f\"Document text: {doc_text}\", role=\"user\"),\n",
|
||||
" ]\n",
|
||||
" result = await sllm.achat(messages)\n",
|
||||
" return result.raw.sections\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def arefine_sections(\n",
|
||||
@@ -472,23 +433,20 @@
|
||||
" Given this, return the list of indexes that are valid. Do NOT include the indexes to be removed.\n",
|
||||
" \n",
|
||||
" \"\"\"\n",
|
||||
" llm = llm or OpenAI(model=\"gpt-4o\")\n",
|
||||
"\n",
|
||||
" chat_template = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" ChatMessage.from_str(system_prompt, \"system\"),\n",
|
||||
" ChatMessage.from_str(\"Sections in text:\\n\\n{sections}\", \"user\"),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" llm = llm or OpenAI(model=\"gpt-5-mini\", api_key=\"sk-...\")\n",
|
||||
" sllm = llm.as_structured_llm(ValidSections)\n",
|
||||
"\n",
|
||||
" section_texts = \"\\n\".join(\n",
|
||||
" [f\"{idx}: {json.dumps(s.dict())}\" for idx, s in enumerate(sections)]\n",
|
||||
" [f\"{idx}: {json.dumps(s.model_dump())}\" for idx, s in enumerate(sections)]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" result = await llm.astructured_predict(\n",
|
||||
" ValidSections, chat_template, sections=section_texts\n",
|
||||
" )\n",
|
||||
" valid_indexes = result.valid_indexes\n",
|
||||
" messages = [\n",
|
||||
" ChatMessage(content=system_prompt, role=\"system\"),\n",
|
||||
" ChatMessage(content=f\"Sections in text:\\n\\n{section_texts}\", role=\"user\"),\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" result = await sllm.achat(messages)\n",
|
||||
" valid_indexes = result.raw.valid_indexes\n",
|
||||
"\n",
|
||||
" new_sections = [s for idx, s in enumerate(sections) if idx in valid_indexes]\n",
|
||||
" return new_sections\n",
|
||||
@@ -514,17 +472,7 @@
|
||||
"execution_count": null,
|
||||
"id": "6e360a5c-29bd-4d86-9a21-f46013bab39a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████████████████████████████████████████████████████████████████| 51/51 [00:11<00:00, 4.35it/s]\n",
|
||||
"100%|██████████████████████████████████████████████████████████████████████| 30/30 [00:09<00:00, 3.05it/s]\n",
|
||||
"100%|██████████████████████████████████████████████████████████████████████| 25/25 [00:07<00:00, 3.22it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sections_dict = asyncio_run(acreate_sections(text_nodes_dict))"
|
||||
]
|
||||
@@ -538,36 +486,36 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[SectionOutput(section_name='1', section_title='INTRODUCTION', start_page_number=1, is_subsection=False, description='# 1 INTRODUCTION'),\n",
|
||||
" SectionOutput(section_name='2', section_title='BENCHMARK CONSTRUCTION', start_page_number=2, is_subsection=False, description='# BENCHMARK CONSTRUCTION'),\n",
|
||||
" SectionOutput(section_name='2.2', section_title='TASK FORMULATION', start_page_number=3, is_subsection=True, description='# 2.2 TASK FORMULATION'),\n",
|
||||
" SectionOutput(section_name='2.3', section_title='FEATURES OF SWE-BENCH', start_page_number=3, is_subsection=True, description='# 2.3 FEATURES OF SWE-BENCH'),\n",
|
||||
" SectionOutput(section_name='3', section_title='SWE-LLAMA: FINE-TUNING CODELLAMA FOR SWE-BENCH', start_page_number=3, is_subsection=False, description='# 3 SWE-LLAMA: FINE-TUNING CODELLAMA FOR SWE-BENCH'),\n",
|
||||
"[SectionOutput(section_name='1', section_title='Introduction', start_page_number=1, is_subsection=False, description='## 1 Introduction'),\n",
|
||||
" SectionOutput(section_name='2.2', section_title='TASK FORMULATION', start_page_number=3, is_subsection=True, description='## 2.2 TASK FORMULATION'),\n",
|
||||
" SectionOutput(section_name='2.3', section_title='FEATURES OF SWE-BENCH', start_page_number=3, is_subsection=True, description='## 2.3 FEATURES OF SWE-BENCH'),\n",
|
||||
" SectionOutput(section_name='3', section_title='SWE-LLAMA: FINE-TUNING CODELLAMA FOR SWE-BENCH', start_page_number=3, is_subsection=False, description='## 3 SWE-LLAMA: FINE-TUNING CODELLAMA FOR SWE-BENCH'),\n",
|
||||
" SectionOutput(section_name='4', section_title='EXPERIMENTAL SETUP', start_page_number=4, is_subsection=False, description='# 4 EXPERIMENTAL SETUP'),\n",
|
||||
" SectionOutput(section_name='4.1', section_title='RETRIEVAL-BASED APPROACH', start_page_number=4, is_subsection=True, description='# 4.1 RETRIEVAL-BASED APPROACH'),\n",
|
||||
" SectionOutput(section_name='4.2', section_title='INPUT FORMAT', start_page_number=5, is_subsection=True, description='# 4.2 INPUT FORMAT'),\n",
|
||||
" SectionOutput(section_name='4.3', section_title='MODELS', start_page_number=5, is_subsection=True, description='# 4.3 MODELS'),\n",
|
||||
" SectionOutput(section_name='4.1', section_title='RETRIEVAL-BASED APPROACH', start_page_number=4, is_subsection=True, description='## 4.1 RETRIEVAL-BASED APPROACH'),\n",
|
||||
" SectionOutput(section_name='4.2', section_title='INPUT FORMAT', start_page_number=5, is_subsection=True, description='## 4.2 INPUT FORMAT'),\n",
|
||||
" SectionOutput(section_name='4.3', section_title='MODELS', start_page_number=5, is_subsection=True, description='## 4.3 MODELS'),\n",
|
||||
" SectionOutput(section_name='5', section_title='RESULTS', start_page_number=5, is_subsection=False, description='# 5 RESULTS'),\n",
|
||||
" SectionOutput(section_name='5.1', section_title='A QUALITATIVE ANALYSIS OF SWE-LLAMA GENERATIONS', start_page_number=8, is_subsection=True, description='# 5.1 A QUALITATIVE ANALYSIS OF SWE-LLAMA GENERATIONS'),\n",
|
||||
" SectionOutput(section_name='6', section_title='RELATED WORK', start_page_number=8, is_subsection=False, description='# 6 RELATED WORK'),\n",
|
||||
" SectionOutput(section_name='7', section_title='DISCUSSION', start_page_number=9, is_subsection=False, description='# 7 DISCUSSION'),\n",
|
||||
" SectionOutput(section_name='8', section_title='ETHICS STATEMENT', start_page_number=10, is_subsection=False, description='# 8 ETHICS STATEMENT'),\n",
|
||||
" SectionOutput(section_name='9', section_title='REPRODUCIBILITY STATEMENT', start_page_number=10, is_subsection=False, description='# 9 REPRODUCIBILITY STATEMENT'),\n",
|
||||
" SectionOutput(section_name='10', section_title='ACKNOWLEDGEMENTS', start_page_number=10, is_subsection=False, description='# 10 ACKNOWLEDGEMENTS'),\n",
|
||||
" SectionOutput(section_name='A', section_title='BENCHMARK DETAILS', start_page_number=15, is_subsection=False, description='# A BENCHMARK DETAILS'),\n",
|
||||
" SectionOutput(section_name='A.1', section_title='HIGH LEVEL OVERVIEW', start_page_number=15, is_subsection=True, description='# A.1 HIGH LEVEL OVERVIEW'),\n",
|
||||
" SectionOutput(section_name='A.2', section_title='CONSTRUCTION PROCESS', start_page_number=16, is_subsection=True, description='# A.2 CONSTRUCTION PROCESS'),\n",
|
||||
" SectionOutput(section_name='A.3', section_title='Execution-Based Validation', start_page_number=18, is_subsection=True, description='# A.3 EXECUTION-BASED VALIDATION'),\n",
|
||||
" SectionOutput(section_name='A.5', section_title='Evaluation Test Set Characterization', start_page_number=20, is_subsection=True, description='# A.5 EVALUATION TEST SET CHARACTERIZATION'),\n",
|
||||
" SectionOutput(section_name='A.6', section_title='DEVELOPMENT SET CHARACTERIZATION', start_page_number=23, is_subsection=True, description='# A.6 DEVELOPMENT SET CHARACTERIZATION'),\n",
|
||||
" SectionOutput(section_name='B', section_title='ADDITIONAL DETAILS ON TRAINING SWE-LLAMA', start_page_number=24, is_subsection=False, description='# B ADDITIONAL DETAILS ON TRAINING SWE-LLAMA'),\n",
|
||||
" SectionOutput(section_name='B.1', section_title='TRAINING DETAILS', start_page_number=24, is_subsection=True, description='# B.1 TRAINING DETAILS'),\n",
|
||||
" SectionOutput(section_name='D', section_title='ADDITIONAL EXPERIMENTAL DETAILS', start_page_number=28, is_subsection=False, description='# D ADDITIONAL EXPERIMENTAL DETAILS'),\n",
|
||||
" SectionOutput(section_name='D.1', section_title='RETRIEVAL DETAILS', start_page_number=28, is_subsection=True, description='# D.1 RETRIEVAL DETAILS'),\n",
|
||||
" SectionOutput(section_name='D.2', section_title='INFERENCE SETTINGS', start_page_number=29, is_subsection=True, description='# D.2 INFERENCE SETTINGS'),\n",
|
||||
" SectionOutput(section_name='D.3', section_title='PROMPT TEMPLATE EXAMPLE', start_page_number=29, is_subsection=True, description='# D.3 PROMPT TEMPLATE EXAMPLE'),\n",
|
||||
" SectionOutput(section_name='E', section_title='Societal Impact', start_page_number=31, is_subsection=False, description='# E SOCIETAL IMPACT'),\n",
|
||||
" SectionOutput(section_name='F', section_title='In-Depth Analysis of SWE-Llama Generations', start_page_number=31, is_subsection=False, description='# F IN-DEPTH ANALYSIS OF SWE-LLAMA GENERATIONS')]"
|
||||
" SectionOutput(section_name='A.1', section_title='HIGH LEVEL OVERVIEW', start_page_number=15, is_subsection=True, description='### A.1 HIGH LEVEL OVERVIEW'),\n",
|
||||
" SectionOutput(section_name='A.2', section_title='CONSTRUCTION PROCESS', start_page_number=16, is_subsection=True, description='## A.2 CONSTRUCTION PROCESS'),\n",
|
||||
" SectionOutput(section_name='A.3', section_title='EXECUTION-BASED VALIDATION', start_page_number=18, is_subsection=True, description='### A.3 EXECUTION-BASED VALIDATION'),\n",
|
||||
" SectionOutput(section_name='A.4', section_title='EVALUATION PROCEDURE', start_page_number=19, is_subsection=True, description='## A.4 EVALUATION PROCEDURE'),\n",
|
||||
" SectionOutput(section_name='A.5', section_title='EVALUATION TEST SET CHARACTERIZATION', start_page_number=20, is_subsection=True, description='## A.5 EVALUATION TEST SET CHARACTERIZATION'),\n",
|
||||
" SectionOutput(section_name='A.6', section_title='DEVELOPMENT SET CHARACTERIZATION', start_page_number=23, is_subsection=True, description='## A.6 DEVELOPMENT SET CHARACTERIZATION'),\n",
|
||||
" SectionOutput(section_name='B.1', section_title='TRAINING DETAILS', start_page_number=24, is_subsection=True, description='## B.1 TRAINING DETAILS'),\n",
|
||||
" SectionOutput(section_name='C.1', section_title='RESULTS WITH “ORACLE” RETRIEVAL', start_page_number=24, is_subsection=True, description='## C.1 RESULTS WITH “ORACLE” RETRIEVAL'),\n",
|
||||
" SectionOutput(section_name='C.2', section_title='EVALUATION TEST SET', start_page_number=24, is_subsection=True, description='## C.2 EVALUATION TEST SET'),\n",
|
||||
" SectionOutput(section_name='C.3', section_title='GPT-4 EVALUATION SUBSET RESULTS', start_page_number=24, is_subsection=True, description='## C.3 GPT-4 EVALUATION SUBSET RESULTS'),\n",
|
||||
" SectionOutput(section_name='C.4', section_title='EXTENDED TEMPORAL ANALYSIS', start_page_number=25, is_subsection=True, description='## C.4 EXTENDED TEMPORAL ANALYSIS'),\n",
|
||||
" SectionOutput(section_name='C.5', section_title='F2P, P2P RATE ANALYSIS', start_page_number=25, is_subsection=True, description='## C.5 F2P, P2P RATE ANALYSIS'),\n",
|
||||
" SectionOutput(section_name='C.7', section_title='SOFTWARE ENGINEERING METRICS', start_page_number=27, is_subsection=True, description='## C.7 SOFTWARE ENGINEERING METRICS'),\n",
|
||||
" SectionOutput(section_name='D.1', section_title='RETRIEVAL DETAILS', start_page_number=28, is_subsection=True, description='## D.1 RETRIEVAL DETAILS'),\n",
|
||||
" SectionOutput(section_name='D.2', section_title='INFERENCE SETTINGS', start_page_number=29, is_subsection=True, description='## D.2 INFERENCE SETTINGS'),\n",
|
||||
" SectionOutput(section_name='D.3', section_title='PROMPT TEMPLATE EXAMPLE', start_page_number=29, is_subsection=True, description='## D.3 PROMPT TEMPLATE EXAMPLE')]"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
@@ -576,7 +524,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sections_dict[\"swebench.pdf\"]"
|
||||
"sections_dict[\"iclr_docs/swebench.pdf\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -755,7 +703,7 @@
|
||||
"from llama_index.vector_stores.chroma import ChromaVectorStore\n",
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"\n",
|
||||
"persist_dir = \"storage_chroma\"\n",
|
||||
"persist_dir = \"chroma_storage\"\n",
|
||||
"\n",
|
||||
"vector_store = ChromaVectorStore.from_params(\n",
|
||||
" collection_name=\"text_nodes\", persist_dir=persist_dir\n",
|
||||
@@ -805,7 +753,7 @@
|
||||
"source": [
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(model=\"gpt-4o\")"
|
||||
"llm = OpenAI(model=\"gpt-5-mini\", api_key=\"sk-...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -833,6 +781,7 @@
|
||||
" FilterCondition,\n",
|
||||
")\n",
|
||||
"from llama_index.core.schema import NodeWithScore\n",
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def section_retrieve(query: str, verbose: bool = False) -> List[NodeWithScore]:\n",
|
||||
@@ -870,57 +819,6 @@
|
||||
" return all_section_nodes.values()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f721e770-ce4c-4511-96d5-8a89d16c7281",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
">> Identifying the right sections to retrieve\n",
|
||||
">> Retrieving section: A: BENCHMARK DETAILS\n",
|
||||
">> Retrieving section: 2: BENCHMARK CONSTRUCTION\n",
|
||||
">> Retrieving section: A: BENCHMARK DETAILS\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"nodes = section_retrieve(\n",
|
||||
" \"Give me a full overview of the benchmark details in SWE Bench\", verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e99eaa71-7d93-40c0-bba0-a9c983a6cbd3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'page_num': 15, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.1: HIGH LEVEL OVERVIEW'}\n",
|
||||
"{'page_num': 16, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.2: CONSTRUCTION PROCESS'}\n",
|
||||
"{'page_num': 17, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.2: CONSTRUCTION PROCESS'}\n",
|
||||
"{'page_num': 18, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.3: Execution-Based Validation'}\n",
|
||||
"{'page_num': 19, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.3: Execution-Based Validation'}\n",
|
||||
"{'page_num': 20, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.5: Evaluation Test Set Characterization'}\n",
|
||||
"{'page_num': 21, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.5: Evaluation Test Set Characterization'}\n",
|
||||
"{'page_num': 22, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.5: Evaluation Test Set Characterization'}\n",
|
||||
"{'page_num': 23, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.6: DEVELOPMENT SET CHARACTERIZATION'}\n",
|
||||
"{'page_num': 2, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': '2: BENCHMARK CONSTRUCTION', 'sub_section_id': '2: BENCHMARK CONSTRUCTION'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for n in nodes:\n",
|
||||
" print(n.node.metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -932,9 +830,9 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
">> Identifying the right sections to retrieve\n",
|
||||
">> Retrieving section: F: ADDITIONAL RESULTS\n",
|
||||
">> Retrieving section: 6: Conclusion\n",
|
||||
">> Retrieving section: 5: EXPERIMENTS\n",
|
||||
">> Retrieving section: F: ADDITIONAL RESULTS\n"
|
||||
">> Retrieving section: 5: EXPERIMENTS\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -955,11 +853,26 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'page_num': 21, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': 'F: ADDITIONAL RESULTS', 'sub_section_id': 'F.1: FULL QUALITATIVE RESULTS'}\n",
|
||||
"{'page_num': 22, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': 'F: ADDITIONAL RESULTS', 'sub_section_id': 'F.4: Additional Baselines'}\n",
|
||||
"{'page_num': 9, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 10, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 11, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 12, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 13, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 14, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 15, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 16, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': '6: Conclusion'}\n",
|
||||
"{'page_num': 17, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'C.1: Universality of Inner Product Decomposition'}\n",
|
||||
"{'page_num': 18, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'C.2: Lipschitz Constraint under the Temporal Distance Metric'}\n",
|
||||
"{'page_num': 19, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'C.2: Lipschitz Constraint under the Temporal Distance Metric'}\n",
|
||||
"{'page_num': 20, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'E.2: DADS'}\n",
|
||||
"{'page_num': 21, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'F.1: FULL QUALITATIVE RESULTS'}\n",
|
||||
"{'page_num': 22, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'F.4: ADDITIONAL BASELINES'}\n",
|
||||
"{'page_num': 23, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'G.1: Environments'}\n",
|
||||
"{'page_num': 24, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'G.2: IMPLEMENTATION DETAILS'}\n",
|
||||
"{'page_num': 25, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '6: Conclusion', 'sub_section_id': 'G.2: IMPLEMENTATION DETAILS'}\n",
|
||||
"{'page_num': 6, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '5: EXPERIMENTS', 'sub_section_id': '5: EXPERIMENTS'}\n",
|
||||
"{'page_num': 7, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '5: EXPERIMENTS', 'sub_section_id': '5.2: QUALITATIVE COMPARISON'}\n",
|
||||
"{'page_num': 8, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '5: EXPERIMENTS', 'sub_section_id': '5.3: QUANTITATIVE COMPARISON'}\n"
|
||||
"{'page_num': 8, 'paper_path': 'iclr_docs/metra.pdf', 'section_id': '5: EXPERIMENTS', 'sub_section_id': '5.3: Quantitative Comparison'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -1027,10 +940,24 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
">> Identifying the right sections to retrieve\n",
|
||||
">> Retrieving section: A: BENCHMARK DETAILS\n",
|
||||
">> Retrieving section: 5: RESULTS\n",
|
||||
">> Retrieving section: A: BENCHMARK DETAILS\n",
|
||||
"In SWEBench, difficulty correlates with context length in a way that as the total context length increases, model performance tends to drop. This is observed across various models, including Claude 2, which shows a significant decrease in performance with longer context lengths. The models often struggle to localize the problematic code that needs updating when presented with a lot of code that may not be directly related to the issue at hand. This suggests that models can become distracted by additional context, which aligns with findings from other studies indicating that models may be sensitive to the relative location of target sequences. Even when increasing the maximum context size improves recall with respect to the oracle files, performance still drops, indicating that models are ineffective at localizing the necessary code changes.\n"
|
||||
">> Retrieving section: 3: SWE-LLAMA: FINE-TUNING CODELLAMA FOR SWE-BENCH\n",
|
||||
">> Retrieving section: 4: EXPERIMENTAL SETUP\n",
|
||||
"Key findings about how difficulty correlates with context length\n",
|
||||
"\n",
|
||||
"- Performance falls as total input/context size grows. As the amount of code and other context provided to models increases, their ability to localize and produce correct edits drops noticeably (this behavior was observed across multiple models, e.g., Claude 2 and others).\n",
|
||||
"\n",
|
||||
"- Extra (irrelevant) context distracts models. When models are given a lot of code that is unrelated to the actual edit, they frequently struggle to find the problematic lines that need changing. This sensitivity includes the relative location of the target code within the larger context.\n",
|
||||
"\n",
|
||||
"- Increasing retriever recall doesn't fix it. Expanding retrieval windows (to include more files and therefore raise oracle recall) can actually hurt end-to-end performance because models become less effective at pinpointing the needed edits amid the extra material.\n",
|
||||
"\n",
|
||||
"- Collapsing context around the true edits helps. An ablation that collapses retrieved files to only the lines actually modified in the reference patch (±15 lines) improved results — for example, one model’s resolved rate rose from 4.8% to 5.9%, and another increased from ~1.3% to 3.4% — showing that concentrating context on the most relevant snippets makes the task easier.\n",
|
||||
"\n",
|
||||
"- Finetuned models are sensitive to context-distribution shifts. Models fine-tuned on tightly scoped (oracle) contexts performed worse when given BM25-retrieved context that contained many irrelevant files, indicating that training with one style of context can reduce robustness to different retrieval outputs.\n",
|
||||
"\n",
|
||||
"Implications\n",
|
||||
"- Better retrieval or context-compression methods (e.g., more precise retrieval, collapsing to edited regions, or preprocessing to highlight likely relevant locations) are likely more useful than simply increasing context size.\n",
|
||||
"- Robust model behavior requires not just larger windows but mechanisms for localization and filtering of relevant code within long contexts.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -1052,18 +979,98 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
">> Identifying the right sections to retrieve\n",
|
||||
">> Retrieving section: A: BENCHMARK DETAILS\n",
|
||||
">> Retrieving section: 2: BENCHMARK CONSTRUCTION\n",
|
||||
">> Retrieving section: A: BENCHMARK DETAILS\n",
|
||||
"SWE-bench is a benchmark designed to evaluate language models in a realistic software engineering setting by using GitHub issues and pull requests from popular repositories. The benchmark involves generating a pull request that addresses a given issue and passes related tests. The construction of SWE-bench involves a three-stage pipeline:\n",
|
||||
">> Retrieving section: 10: ACKNOWLEDGEMENTS\n",
|
||||
">> Retrieving section: 1: Introduction\n",
|
||||
">> Retrieving section: 3: SWE-LLAMA: FINE-TUNING CODELLAMA FOR SWE-BENCH\n",
|
||||
"High-level summary\n",
|
||||
"- SWE-bench is a repository-scale, execution-validated benchmark of real GitHub issues paired with merged pull-request solutions. Each task gives a snapshot of a real codebase plus an issue description; the model must produce a patch that, when applied, makes the repository pass the tests that verify the issue was addressed.\n",
|
||||
"- The benchmark emphasizes realistic, hard software-engineering problems: large codebases, multi-file edits, long issue descriptions, and unit tests used for automatic verification.\n",
|
||||
"\n",
|
||||
"1. **Repo Selection and Data Scraping**: Pull requests are collected from 12 popular open-source Python repositories on GitHub, resulting in approximately 90,000 PRs. These repositories are chosen for their better maintenance, clear contributor guidelines, and comprehensive test coverage.\n",
|
||||
"Data sources and collection\n",
|
||||
"- Candidate PRs are sourced from popular Python projects (selected from highly downloaded PyPI packages and mapped to their GitHub repositories). Repositories are filtered to ensure permissible licenses.\n",
|
||||
"- Pull requests are collected via the GitHub API and then filtered automatically.\n",
|
||||
"\n",
|
||||
"2. **Attribute-Based Filtering**: Candidate tasks are created by selecting merged PRs that resolve a GitHub issue and contribute tests. This indicates that the user likely added tests to verify the resolution of the issue.\n",
|
||||
"Task-instance selection criteria\n",
|
||||
"A PR becomes a candidate task only if it satisfies all of:\n",
|
||||
"- Status = merged (the PR was accepted).\n",
|
||||
"- The PR resolves one or more GitHub issues (detected via links like “fixes #N” in title/body/commits).\n",
|
||||
"- The PR introduces or edits test files (file paths containing test-related keywords).\n",
|
||||
"Only candidates that pass execution-based validation are kept.\n",
|
||||
"\n",
|
||||
"3. **Execution-Based Filtering**: For each candidate task, the PR's test content is applied, and test results are logged before and after applying the PR's other content. Tasks are filtered out if they do not have at least one test that changes from fail to pass or if they result in installation or runtime errors.\n",
|
||||
"Task-instance components\n",
|
||||
"Each task instance encodes:\n",
|
||||
"- Codebase reference C: repo owner/name and the base commit (mirrored repositories are created so code can be retrieved reproducibly).\n",
|
||||
"- Problem statement P: aggregated issue titles and descriptions and any issue/PR comments up to the PR’s first commit (no post-solution comments that would leak the fix).\n",
|
||||
"- Tests T: the tests introduced/edited by the PR (extracted from the PR diff and stored as a .patch).\n",
|
||||
"- Solution δ (gold patch): the PR’s code changes excluding test edits (stored as a .patch).\n",
|
||||
"- Metadata fields: base_commit, created_at, instance_id, issue_numbers, repo, pull_number, version, env_install_commit, hints_text (collected comments), and cached test result mappings like FAIL_TO_PASS and PASS_TO_PASS.\n",
|
||||
"\n",
|
||||
"The benchmark is designed to be extensible, allowing for updates with new task instances as new language models are released. It includes a robust framework for execution-based evaluation, ensuring that generated solutions can be verified by running unit tests. SWE-bench also provides a training dataset, SWE-bench-train, and fine-tuned models like SWE-Llama 7b and 13b, which are based on the CodeLlama model. These models are evaluated on their ability to resolve issues, with SWE-Llama 13b showing competitive performance in some settings.\n"
|
||||
"Execution-based validation (quality control)\n",
|
||||
"- Virtual execution contexts are created per repository release version (manual inspection of README/contributing to determine Python version, dependencies, install commands). Conda environments are used.\n",
|
||||
"- For each candidate instance the pipeline:\n",
|
||||
" 1. Checks out the base commit.\n",
|
||||
" 2. Installs the codebase in the corresponding env.\n",
|
||||
" 3. Applies the test patch T and runs tests (log_pre).\n",
|
||||
" 4. Applies the solution patch δ and runs tests again (log_post).\n",
|
||||
"- Candidates are discarded if any step fails (checkout, install, apply patch, test run).\n",
|
||||
"- Instances are retained only if at least one test changes from fail → pass (a true FAIL_TO_PASS) and if there are no trivial issues (e.g., ImportError or AttributeError in log_pre that indicate missing dependency/name issues).\n",
|
||||
"- Instances whose tests exercise newly created functions/classes (i.e., tests requiring names introduced by δ) are excluded because they would be impossible to solve from the problem statement alone.\n",
|
||||
"\n",
|
||||
"Task-instance format and artifacts\n",
|
||||
"- Finalized instances are saved in a single JSON file (task metadata and patch contents are included as patch-format strings).\n",
|
||||
"- For each instance the validation engine caches parsed test-to-status mappings for log_pre/log_post and creates ground-truth lists: FAIL_TO_PASS, PASS_TO_PASS (used during evaluation to check both that the fix was implemented and that prior behavior is preserved).\n",
|
||||
"- Mirrors of original repositories are created and stored to preserve exact base commits and enable reproducible checkout.\n",
|
||||
"\n",
|
||||
"Evaluation procedure (how models are scored)\n",
|
||||
"- Model input: problem statement P and the codebase C (usually limited by retrieval/long-context strategy). The model must generate a single .patch (a git/unified-diff style patch).\n",
|
||||
"- Per predicted patch the evaluation harness:\n",
|
||||
" 1. Resets repo to base commit.\n",
|
||||
" 2. Activates the executable context for the instance version.\n",
|
||||
" 3. Installs the codebase.\n",
|
||||
" 4. Applies the test patch T.\n",
|
||||
" 5. Attempts to apply the predicted patch \\hat{δ}. If applying fails, an automatic \"patch-fix\" step tries to repair the patch (e.g., strip extraneous context lines and recalculate headers); if it still fails the prediction is scored as failure.\n",
|
||||
" 6. Runs the repository’s test command to generate log_{\\hat{δ}}.\n",
|
||||
" 7. Parses log_{\\hat{δ}} into a test-to-status mapping using repository-specific parsers.\n",
|
||||
" 8. Declares the task solved only if all tests listed in FAIL_TO_PASS and PASS_TO_PASS have status = pass in log_{\\hat{δ}}.\n",
|
||||
"- The principal metric is % Resolved: fraction of task instances fully solved (all required tests pass).\n",
|
||||
"\n",
|
||||
"Patch-fixing and robustness\n",
|
||||
"- If a generated patch does not apply, the harness attempts an automated repair (e.g., removing context lines, fixing header offsets) before giving up. Applied-but-broken patches that then fail tests are classified according to pass/fail patterns (Resolved, Breaking Resolved, Partially Resolved, Work-in-Progress, No-Op, Regression) to provide finer-grained analysis.\n",
|
||||
"\n",
|
||||
"Dataset scale and characterization\n",
|
||||
"- Raw crawl: ~93k PRs across selected repositories; after conversion/filters and execution validation the final evaluation set contains 2,294 task instances.\n",
|
||||
"- Instances come from 12 widely used Python repositories with varied sizes and purposes (e.g., scikit-learn, Django, matplotlib, requests, pytest, sympy, astropy, etc.).\n",
|
||||
"- Typical instance properties: long problem descriptions (median ~140 words), large repositories (median ~thousands of files and hundreds of thousands of lines), and reference edits that usually touch ~1–2 files, edit a few functions, and modify a few dozen lines on average.\n",
|
||||
"- Tests: each instance has at least one FAIL_TO_PASS; many instances include many PASS_TO_PASS tests for regression protection (median tens to hundreds of pass-to-pass tests).\n",
|
||||
"\n",
|
||||
"Development set, train set, and extensions\n",
|
||||
"- A smaller development set (~225 instances, >10% of the main set) is provided for tuning and debugging.\n",
|
||||
"- A separate SWE-bench-train dataset (19k non-testing task instances from many repos) was prepared for fine-tuning models; fine-tuned models were released (SWE-Llama 7B and 13B) to study open-model performance on long contexts.\n",
|
||||
"- The collection pipeline and mirror strategy were designed to be easily extendable so the benchmark can be updated continuously with new PRs and support additional languages or repos.\n",
|
||||
"\n",
|
||||
"Reproducibility and release commitments\n",
|
||||
"- The codebase used to collect, validate, and evaluate task instances is organized and documented; mirrors and the JSON of task instances are provided so others can reproduce experiments.\n",
|
||||
"- Execution contexts, validation logs, and ground-truth test mappings are cached to avoid re-running expensive validation at evaluation time.\n",
|
||||
"- Plans include open-sourcing the task instances, collection/evaluation infrastructure, training data used for fine-tuning, and model weights along with documentation.\n",
|
||||
"\n",
|
||||
"Design decisions and safeguards\n",
|
||||
"- Using merged PRs that added tests provides a strong ground-truth signal that the PR truly solved the issue and allowed for reproducible verification.\n",
|
||||
"- Excluding instances with trivial dependency/name errors or tests that require newly-introduced symbol names ensures tasks are solvable from the given P + C without hidden knowledge.\n",
|
||||
"- Mirroring repositories preserves commit history and avoids breakage from later upstream edits.\n",
|
||||
"\n",
|
||||
"What solving a task means (concrete criterion)\n",
|
||||
"- A generated patch must apply and, after applying the repository’s tests, every test that the validation flagged as verifying the issue (FAIL_TO_PASS) must now pass, and all tests that previously passed but were intended to remain passing (PASS_TO_PASS) must still pass. Only then is the task counted as solved.\n",
|
||||
"\n",
|
||||
"Utility and intended uses\n",
|
||||
"- The benchmark measures model ability to: localize defects, reason across a large codebase, produce multi-line and multi-file edits in patch format, and use execution feedback (tests) as verification.\n",
|
||||
"- It is intended both as a hard evaluation for current models and as a development target for models and systems that perform repository-scale code edits, retrieval from large codebases, iterative editing with execution feedback, or agent-style multi-step repair.\n",
|
||||
"\n",
|
||||
"Limitations to be aware of\n",
|
||||
"- The benchmark focuses on repositories with permissive licenses and decent test coverage (popular projects), so it emphasizes bug fixes and features that were covered by tests and merged in those projects.\n",
|
||||
"- Some tasks that require creating new symbol names first introduced in the solution are excluded because they would not be solvable from the baseline inputs.\n",
|
||||
"- Execution environments are created per release version (manual aspects exist), and some instances are discarded when installation or environment setup cannot be reliably reproduced.\n",
|
||||
"\n",
|
||||
"Overall, SWE-bench provides a large, execution-validated, reproducible suite of real-world repository-scale code-editing tasks that require understanding long contexts and producing correct patch-format edits verified by the project’s own tests.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -1074,34 +1081,6 @@
|
||||
"print(str(response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6d747bf8-0ed2-4c10-8108-9d0e8d53a4fb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'page_num': 15, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.1: HIGH LEVEL OVERVIEW'}\n",
|
||||
"{'page_num': 16, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.2: CONSTRUCTION PROCESS'}\n",
|
||||
"{'page_num': 17, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.2: CONSTRUCTION PROCESS'}\n",
|
||||
"{'page_num': 18, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.3: Execution-Based Validation'}\n",
|
||||
"{'page_num': 19, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.3: Execution-Based Validation'}\n",
|
||||
"{'page_num': 20, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.5: Evaluation Test Set Characterization'}\n",
|
||||
"{'page_num': 21, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.5: Evaluation Test Set Characterization'}\n",
|
||||
"{'page_num': 22, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.5: Evaluation Test Set Characterization'}\n",
|
||||
"{'page_num': 23, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': 'A: BENCHMARK DETAILS', 'sub_section_id': 'A.6: DEVELOPMENT SET CHARACTERIZATION'}\n",
|
||||
"{'page_num': 2, 'paper_path': 'iclr_docs/swebench.pdf', 'section_id': '2: BENCHMARK CONSTRUCTION', 'sub_section_id': '2: BENCHMARK CONSTRUCTION'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for n in response.source_nodes:\n",
|
||||
" print(n.metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -1113,20 +1092,76 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
">> Identifying the right sections to retrieve\n",
|
||||
">> Retrieving section: F: ADDITIONAL RESULTS\n",
|
||||
">> Retrieving section: 6: Conclusion\n",
|
||||
">> Retrieving section: 5: EXPERIMENTS\n",
|
||||
">> Retrieving section: F: ADDITIONAL RESULTS\n",
|
||||
"The additional experimental results in the METRA paper include several key findings:\n",
|
||||
">> Retrieving section: 5: EXPERIMENTS\n",
|
||||
"Here are the additional experimental results and analyses reported.\n",
|
||||
"\n",
|
||||
"1. **Full Qualitative Results**: METRA discovers diverse locomotion behaviors across different environments, including state-based Ant and HalfCheetah, and pixel-based Quadruped and Humanoid. The results are consistent across multiple random seeds, indicating robustness in behavior discovery.\n",
|
||||
"1) Full qualitative results (complete skill behaviors, 8 seeds)\n",
|
||||
"- Environments: state-based Ant and HalfCheetah; pixel-based Quadruped and Humanoid.\n",
|
||||
"- Skill parameterizations used in these visualizations: 2-D continuous skills for Ant and Humanoid, 4-D continuous skills for Quadruped, 16 discrete skills for HalfCheetah.\n",
|
||||
"- Main finding: across 8 random seeds METRA consistently discovers diverse locomotion behaviors (radial/x-y coverage, different locomotion modes) regardless of seed. The paper shows multiple sample trajectories per seed to illustrate robustness and diversity.\n",
|
||||
"\n",
|
||||
"2. **Latent Space Visualization**: METRA effectively captures the most temporally spread-out dimensions in the state space, such as x-y coordinates, in its latent space. This is demonstrated in both state-based and pixel-based environments, with higher-dimensional latent spaces capturing more diverse behaviors.\n",
|
||||
"2) Latent-space visualization\n",
|
||||
"- Setup: METRA trained with 2-D continuous latent space on Ant (state inputs) and Humanoid (pixel inputs).\n",
|
||||
"- Observation: the learned representation φ(s) captures the agent’s x-y coordinates in the 2-D latent space in both Ant and Humanoid. The learned φ trajectories align with the x-y trajectories, indicating METRA finds the temporally most spread-out manifold (x-y plane) even from pixels.\n",
|
||||
"- Note: with higher-dimensional or discrete latent spaces, METRA captures more diverse, non-linear behaviors beyond simple locomotion.\n",
|
||||
"\n",
|
||||
"3. **Ablation Study of Latent Space Sizes**: The study shows that increasing the size of the latent space generally enhances the diversity of skills learned by METRA. Different dimensions of continuous and discrete skills were tested on Ant and HalfCheetah.\n",
|
||||
"3) Ablation: effect of latent-space size on learned skills\n",
|
||||
"- Latent-space sizes tested: 1-D, 2-D, 4-D continuous; discrete sets of sizes {2}, {4}, {8}, {16}, {24}.\n",
|
||||
"- Environments: Ant and HalfCheetah.\n",
|
||||
"- Result: skill diversity increases as the capacity (dimensionality / cardinality) of Z grows.\n",
|
||||
" - 1-D: simple linear/one-dimensional coverage\n",
|
||||
" - 2-D: radial coverage / 2-D spread\n",
|
||||
" - 4-D: more complex radial / richer behaviors\n",
|
||||
" - Discrete increases produce progressively more distinct discrete behaviors (more segments, more diverse skill classes)\n",
|
||||
"- Conclusion: METRA maximizes state coverage under latent capacity, so increasing Z’s capacity yields more diverse discovered behaviors.\n",
|
||||
"\n",
|
||||
"4. **Comparison with Additional Baselines**: METRA was compared with DGPO, a method focused on finding diverse behaviors that maximize task rewards. The comparison was conducted in a controlled Markov process setting without external rewards, using only intrinsic rewards.\n",
|
||||
"4) Additional baseline: DGPO comparison (discrete-skill comparison; 4 seeds)\n",
|
||||
"- Experimental setup: DIAYN, DGPO, and METRA were trained with 16 discrete skills for 10,000 epochs (≈16M environment steps).\n",
|
||||
"- Metrics reported: policy state coverage and total state coverage (means ± std).\n",
|
||||
"- Results (Table reproduced):\n",
|
||||
" - HalfCheetah (policy state coverage)\n",
|
||||
" - DIAYN: 6.75 ± 2.22\n",
|
||||
" - DGPO: 6.75 ± 2.06\n",
|
||||
" - METRA: 186.75 ± 16.21\n",
|
||||
" - HalfCheetah (total state coverage)\n",
|
||||
" - DIAYN: 19.50 ± 3.87\n",
|
||||
" - DGPO: 22.25 ± 5.85\n",
|
||||
" - METRA: 177.75 ± 17.10\n",
|
||||
" - Ant (policy state coverage)\n",
|
||||
" - DIAYN: 11.25 ± 5.44\n",
|
||||
" - DGPO: 7.00 ± 3.83\n",
|
||||
" - METRA: 1387.75 ± 77.38\n",
|
||||
" - Ant (total state coverage)\n",
|
||||
" - DIAYN: 107.75 ± 17.00\n",
|
||||
" - DGPO: 121.50 ± 4.36\n",
|
||||
" - METRA: 6313.25 ± 747.92\n",
|
||||
"- Interpretation given: DGPO (which maximizes a metric-agnostic KL-style objective in discrete Z) still produces limited state coverage similar to DIAYN, whereas METRA (a metric-aware Wasserstein formulation) achieves substantially greater coverage in these locomotion environments.\n",
|
||||
"\n",
|
||||
"These results highlight METRA's ability to discover diverse and meaningful behaviors in various environments, its effective use of latent spaces, and its performance relative to other methods.\n"
|
||||
"5) Skill examples / qualitative descriptions by latent size\n",
|
||||
"- A tabulated description shows how skills change qualitatively with latent-size choices (examples):\n",
|
||||
" - Ant (continuous Z):\n",
|
||||
" - 1-D: linearly increasing coverage\n",
|
||||
" - 2-D: radial coverage with 2-D spread\n",
|
||||
" - 4-D: more complex radial coverage\n",
|
||||
" - Ant / HalfCheetah (discrete Z):\n",
|
||||
" - Discrete 2 / 4 / 8 / 16 / 24 skills: progressively more segments and more diverse behaviors, with 24 discrete skills showing the highest diversity.\n",
|
||||
"- The paper notes that with discrete Z METRA can discover qualitatively distinct behaviors such as flips or static postures (in addition to locomotion) when capacity is sufficient.\n",
|
||||
"\n",
|
||||
"6) Details on coverage metrics, datasets, and protocol used in these additional results\n",
|
||||
"- Policy state coverage: computed by sampling 48 deterministic trajectories using 48 randomly sampled skills at each evaluation epoch (used for skill-discovery method policy coverage plots).\n",
|
||||
"- Queue state coverage: computed from most recent 100,000 training trajectories (used for some comparisons).\n",
|
||||
"- Total state coverage: computed from the entire set of training trajectories up to the current epoch (used as a generous metric for pure-exploration baselines).\n",
|
||||
"- For locomotion coverage counting: x-y bins of 1×1 are counted for Ant, Quadruped, Humanoid; x bins for HalfCheetah. Kitchen uses task success counts for pre-defined subtasks.\n",
|
||||
"- Seeds: most qualitative and skill-discovery comparisons use 8 seeds; the DGPO comparison reported used 4 seeds.\n",
|
||||
"\n",
|
||||
"7) Additional notes and takeaways from the extra experiments\n",
|
||||
"- METRA’s learned φ(s) is effective for zero-shot goal selection because φ preserves temporal distances; the latent difference φ(g) − φ(s) gives a direction in Z to reach a goal.\n",
|
||||
"- Increasing latent capacity helps but requires choosing continuous vs. discrete Z appropriately for the desired types of behaviors.\n",
|
||||
"- The DGPO comparison further supports that metric-aware objectives (METRA) lead to substantially higher state coverage than metric-agnostic mutual-information/KL-style objectives.\n",
|
||||
"\n",
|
||||
"If you want, I can extract and present the specific numeric tables and captions (e.g., the full Table 1 numbers above) in CSV or another concise format, or summarize the visual findings into representative example trajectories for each latent-size setting.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -1140,9 +1175,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_index_v3",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_index_v3"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -6,7 +6,12 @@
|
||||
"source": [
|
||||
"# LlamaParse Agent\n",
|
||||
"\n",
|
||||
"This demo walks through using an OpenAI Agent with [LlamaParse](https://cloud.llamaindex.ai)."
|
||||
"This demo walks through using an OpenAI Agent with [LlamaParse](https://cloud.llamaindex.ai).\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -22,7 +27,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install llama-cloud-services llama-index llama-index-postprocessor-sbert-rerank"
|
||||
"!pip install llama-cloud-services \"llama-index>=0.13.0<0.14.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -48,7 +53,7 @@
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-3-small\")\n",
|
||||
"Settings.llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0.2)"
|
||||
"Settings.llm = OpenAI(model=\"gpt-5-mini\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -83,9 +88,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"from sympy import O\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -98,53 +109,27 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 81251f39-01be-434e-99e8-1c1b83b82098\n"
|
||||
"Started parsing the file under job_id cd1958b0-b260-4a63-aa74-bf829a0c125f\n",
|
||||
".."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents = await parser.aload_data(\"paper.pdf\")"
|
||||
"result = await parser.aparse(\"paper.pdf\")\n",
|
||||
"documents = result.get_markdown_documents(split_by_page=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Embeddings have been explicitly disabled. Using MockEmbedding.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"41it [00:00, 26765.21it/s]\n",
|
||||
"100%|██████████| 41/41 [00:13<00:00, 2.98it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"from llama_index.core.node_parser import (\n",
|
||||
" MarkdownElementNodeParser,\n",
|
||||
" SentenceSplitter,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# explicitly extract tables with the MarkdownElementNodeParser\n",
|
||||
"node_parser = MarkdownElementNodeParser(num_workers=8)\n",
|
||||
"nodes = node_parser.get_nodes_from_documents(documents)\n",
|
||||
"nodes, objects = node_parser.get_nodes_and_objects(nodes)\n",
|
||||
"from llama_index.core.node_parser import SentenceSplitter\n",
|
||||
"\n",
|
||||
"# Chain splitters to ensure chunk size requirements are met\n",
|
||||
"nodes = SentenceSplitter(chunk_size=512, chunk_overlap=20).get_nodes_from_documents(\n",
|
||||
" nodes\n",
|
||||
"nodes = SentenceSplitter(chunk_size=2048, chunk_overlap=256).get_nodes_from_documents(\n",
|
||||
" documents\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -173,30 +158,41 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.agent.openai import OpenAIAgent\n",
|
||||
"from llama_index.core.tools import QueryEngineTool, ToolMetadata\n",
|
||||
"from llama_index.postprocessor.colbert_rerank import ColbertRerank\n",
|
||||
"from llama_index.core.agent import FunctionAgent\n",
|
||||
"from llama_index.core.tools import QueryEngineTool\n",
|
||||
"\n",
|
||||
"tools = [\n",
|
||||
" QueryEngineTool(\n",
|
||||
" QueryEngineTool.from_defaults(\n",
|
||||
" vector_index.as_query_engine(\n",
|
||||
" similarity_top_k=8, node_postprocessors=[ColbertRerank(top_n=3)]\n",
|
||||
" ),\n",
|
||||
" metadata=ToolMetadata(\n",
|
||||
" name=\"search\",\n",
|
||||
" description=\"Search the document, pass the entire user message in the query\",\n",
|
||||
" similarity_top_k=4,\n",
|
||||
" ),\n",
|
||||
" name=\"query\",\n",
|
||||
" description=\"Send a query that requires only a subset of the top-k documents to be considered\",\n",
|
||||
" ),\n",
|
||||
" QueryEngineTool(\n",
|
||||
" QueryEngineTool.from_defaults(\n",
|
||||
" summary_index.as_query_engine(),\n",
|
||||
" metadata=ToolMetadata(\n",
|
||||
" name=\"summarize\",\n",
|
||||
" description=\"Summarize the document using the user message\",\n",
|
||||
" ),\n",
|
||||
" name=\"query_all_docs\",\n",
|
||||
" description=\"Send a query that requires all documents to be considered\",\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"agent = OpenAIAgent.from_tools(tools=tools, verbose=True)"
|
||||
"agent = FunctionAgent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=Settings.llm,\n",
|
||||
" system_prompt=\"You are a helpful assistant that can answer questions about the paper.\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.workflow import Context\n",
|
||||
"\n",
|
||||
"# Context to persist the agent session\n",
|
||||
"ctx = Context(agent)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -208,18 +204,40 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Added user message to memory: What is the summary of the paper?\n",
|
||||
"=== Calling Function ===\n",
|
||||
"Calling function: summarize with args: {\"input\":\"summary\"}\n",
|
||||
"Got output: The research focuses on developing Multimodal Large Language Models (MLLMs) by incorporating image-caption, interleaved image-text, and text-only data for pre-training. It highlights the importance of factors like the image encoder, resolution, and token count, while downplaying the design of the vision-language connector. With models scaling up to 30B parameters, the MM1 family demonstrates impressive performance in pre-training metrics and competitive outcomes on diverse multimodal benchmarks. It demonstrates abilities such as in-context learning and multi-image reasoning, aiming to provide valuable insights for creating MLLMs that benefit the research community.\n",
|
||||
"========================\n",
|
||||
"\n"
|
||||
"Calling tool query_all_docs with args {'input': 'Provide the summary of the paper (concise abstract-like summary).'}\n",
|
||||
"Tool call query_all_docs({'input': 'Provide the summary of the paper (concise abstract-like summary).'}) returned This paper presents a practical recipe and empirical analysis for building high-performing multimodal large language models (MLLMs). Through systematic ablations of image encoders, vision–language connectors, and pre-training data mixtures, the work identifies key design lessons: image resolution and the number of image tokens drive the largest gains, followed by encoder capacity and pre-training data; architectural choices for the vision–language connector matter far less. Data-wise, a careful mixture of captioned images, interleaved image–text documents, and some text-only data is critical — caption data boosts zero-shot captioning, interleaved documents enable strong few-shot and text performance, and text-only data preserves language capabilities. The authors apply these lessons to scale MM1: ViT-H image encoders at high resolution feeding 144 visual tokens into decoder-only LLMs (dense and MoE variants) trained on a 45/45/10 mixture (interleaved/caption/text), for ~200k steps (~400B tokens). MM1 models (dense up to 30B, MoE up to effectively tens of billions of parameters) achieve state-of-the-art few-shot pre-training metrics and competitive supervised fine-tuning results across many established multimodal benchmarks, while exhibiting enhanced in-context learning, multi-image reasoning, and few-shot chain-of-thought capabilities. Practical training details (learning-rate scaling, unfreezing the encoder during SFT, high-resolution support via positional interpolation and sub-image decomposition) and the positive impact of synthetic caption data are reported to guide reproducing and extending these findings.\n",
|
||||
"\n",
|
||||
"================\n",
|
||||
"\n",
|
||||
"Here is a concise, abstract‑style summary of the paper:\n",
|
||||
"\n",
|
||||
"- Goal: provide a practical recipe and empirical analysis for building high‑performing multimodal LLMs (MLLMs) and identify which design choices matter most.\n",
|
||||
"- Key findings: image resolution and number of image tokens yield the largest performance gains, followed by vision‑encoder capacity and pretraining data; the specific architecture of the vision–language connector matters far less.\n",
|
||||
"- Data mix: a careful pretraining mixture is critical—captioned images boost zero‑shot captioning, interleaved image–text documents enable strong few‑shot and text performance, and some text‑only data preserves language capabilities. The authors use a 45/45/10 split (interleaved/caption/text).\n",
|
||||
"- MM1 models: applying these lessons, they scale ViT‑H encoders at high resolution producing 144 visual tokens into decoder‑only LLMs (dense up to 30B, MoE variants effectively larger), trained ~200k steps (~400B tokens).\n",
|
||||
"- Results: MM1 achieves state‑of‑the‑art few‑shot pretraining metrics and competitive supervised fine‑tuning across many multimodal benchmarks, with improved in‑context learning, multi‑image reasoning, and few‑shot chain‑of‑thought behavior.\n",
|
||||
"- Practical guidance: reportable tricks include learning‑rate scaling, unfreezing the encoder during SFT, supporting high resolution via positional interpolation and sub‑image decomposition, and the positive impact of synthetic caption data.\n",
|
||||
"\n",
|
||||
"Overall, the paper offers both empirical insights about what drives MLLM performance and a concrete, reproducible recipe (MM1) that attains strong multimodal capabilities.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# note -- this will take a while with local LLMs, its sending every node in the document to the LLM\n",
|
||||
"resp = agent.chat(\"What is the summary of the paper?\")"
|
||||
"from llama_index.core.agent import ToolCall, ToolCallResult\n",
|
||||
"\n",
|
||||
"handler = agent.run(\n",
|
||||
" \"What is the summary of the paper that you have access to?\", ctx=ctx\n",
|
||||
")\n",
|
||||
"async for ev in handler.stream_events():\n",
|
||||
" if isinstance(ev, ToolCall):\n",
|
||||
" print(f\"Calling tool {ev.tool_name} with args {ev.tool_kwargs}\")\n",
|
||||
" elif isinstance(ev, ToolCallResult):\n",
|
||||
" print(f\"Tool call {ev.tool_name}({ev.tool_kwargs}) returned {ev.tool_output}\")\n",
|
||||
"\n",
|
||||
"print(\"\\n================\\n\")\n",
|
||||
"\n",
|
||||
"resp = await handler\n",
|
||||
"print(resp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -231,57 +249,191 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The summary of the paper highlights the development of Multimodal Large Language Models (MLLMs) by incorporating image-caption, interleaved image-text, and text-only data for pre-training. The research emphasizes factors like the image encoder, resolution, and token count, while de-emphasizing the design of the vision-language connector. The MM1 family of models, scaling up to 30B parameters, shows impressive performance in pre-training metrics and competitive outcomes on various multimodal benchmarks. These models demonstrate capabilities such as in-context learning and multi-image reasoning, aiming to provide valuable insights for creating MLLMs that benefit the research community.\n"
|
||||
"Calling tool query_all_docs with args {'input': 'Describe in detail how the authors evaluate their work: which benchmarks and tasks they use (pretraining metrics, few-shot evaluation, supervised fine-tuning, multimodal benchmarks, in-context learning, chain-of-thought, multi-image reasoning), the metrics reported, baselines compared, and ablation studies conducted. Include mentions of training steps, model sizes, and any special evaluation setups (e.g., positional interpolation, sub-image decomposition, synthetic caption data).'}\n",
|
||||
"Tool call query_all_docs({'input': 'Describe in detail how the authors evaluate their work: which benchmarks and tasks they use (pretraining metrics, few-shot evaluation, supervised fine-tuning, multimodal benchmarks, in-context learning, chain-of-thought, multi-image reasoning), the metrics reported, baselines compared, and ablation studies conducted. Include mentions of training steps, model sizes, and any special evaluation setups (e.g., positional interpolation, sub-image decomposition, synthetic caption data).'}) returned Overview\n",
|
||||
"- Evaluation covers both pre-training (zero-/few-shot) and supervised fine-tuning (SFT) regimes, plus targeted analyses of in-context learning, multi-image reasoning, and chain-of-thought prompting. Evaluations include captioning, VQA, a set of text-only tasks (TextCore), and a wide collection of modern multimodal benchmarks. Results are reported for multiple model scales (dense 3B, 7B, 30B and MoE variants) and compared to several published baselines.\n",
|
||||
"\n",
|
||||
"Pre-training evaluation\n",
|
||||
"- Tasks and benchmarks:\n",
|
||||
" - Image captioning: COCO (Karpathy test), NoCaps (val), TextCaps (val). Captioning use standard caption prompts and reporting.\n",
|
||||
" - Visual question answering / text-in-image tasks: VQAv2 (testdev), TextVQA (val), VizWiz (testdev), GQA, OK-VQA (val).\n",
|
||||
" - A text-only evaluation suite called TextCore (ARC, PIQA, LAMBADA, WinoGrande, HellaSWAG, SciQ, TriviaQA, WebQS) to measure preservation/quality of language capabilities.\n",
|
||||
"- Prompting and generation:\n",
|
||||
" - Captioning prompt: \"{IMAGE} A photo of\" (or equivalent). VQA prompt: \"{IMAGE} Question: {QUESTION} Short answer:\".\n",
|
||||
" - Greedy decoding until EOS or task-specific stop tokens. For captioning the newline is a stop token; for VQA additional stop tokens include \".\", \",\", \"Question\".\n",
|
||||
" - VQA postprocessing follows the same logic used by OpenFlamingo implementations.\n",
|
||||
"- Metrics:\n",
|
||||
" - Captioning: CIDEr (computed via nlg-eval).\n",
|
||||
" - VQA and related QA tasks: task-appropriate accuracy metrics (reported as percentages).\n",
|
||||
" - TextCore: aggregated scores reported to indicate text-only capabilities.\n",
|
||||
" - Pre-training few-shot evaluation reported for 0-shot, 4-shot, and 8-shot settings (4- and 8-shot used as main few-shot points).\n",
|
||||
"- Splits and sampling:\n",
|
||||
" - Few-shot prompts are sampled from training when available, otherwise validation, ensuring the query example is not one of the shots.\n",
|
||||
"- Scale and settings for pre-training evaluation runs:\n",
|
||||
" - Most pre-training evaluations use smaller ablation setups: base ablation LLM = 1.2B (but some encoder ablations use a 2.9B LLM to ensure capacity).\n",
|
||||
" - Final pre-trained models evaluated at 3B, 7B, and 30B (dense) and MoE variants (3B backbone with 64 experts; 7B backbone with 32 experts).\n",
|
||||
"- Baselines for pre-training comparisons:\n",
|
||||
" - Flamingo (various sizes), Emu2 (14B, 37B), IDEFICS (9B, 80B), and other published pre-trained MLLMs where few-shot pre-training numbers are available.\n",
|
||||
"\n",
|
||||
"Supervised fine-tuning (SFT) evaluation\n",
|
||||
"- SFT data and setup:\n",
|
||||
" - SFT mixture contains ≈1.45M examples: GPT-4/GPT-4V-generated instruction-response data (e.g., LLaVA-Conv/Complex, ShareGPT-4V), many academic VL datasets (VQAv2, GQA, OKVQA, A-OKVQA, COCO Captions, OCRVQA, TextCaps, DVQA, ChartQA, AI2D, DocVQA, InfoVQA, SynthDog-En), and a small internal text-only SFT set.\n",
|
||||
" - Fine-tuning: 10k steps, batch size 256, sequence length 2048; optimizer AdaFactor with peak LR 1e-5 and cosine decay to 0. Both image encoder and LLM are unfrozen unless noted in ablations.\n",
|
||||
"- Benchmarks & aggregated evaluation:\n",
|
||||
" - A large set of 12+ multimodal benchmarks is used for SFT evaluation, including VQAv2, TextVQA, ScienceQA-IMG, MMMU, MathVista, MME (perception/cognition splits), MMBench, SEED-Bench, POPE, LLaVA-Bench-in-the-Wild, MM-Vet, etc.\n",
|
||||
" - Results reported per-dataset and combined into a meta-average for comparisons; the meta-average is normalized relative to a compact baseline to make metrics comparable across tasks.\n",
|
||||
"- Baselines and SFT comparisons:\n",
|
||||
" - Compared against a range of SOTA and contemporary multimodal models after instruction tuning: LLaVA variants (1.5/NeXT), InstructBLIP, Qwen-VL, Emu2-Chat, CogVLM, Gemini family, GPT4V where available, and others. Both dense and MoE variants are compared when available.\n",
|
||||
"- High-resolution and multi-image SFT evaluation:\n",
|
||||
" - Two techniques are used to support high-resolution inputs during SFT:\n",
|
||||
" - Positional embedding interpolation to adapt ViT positional embeddings to larger resolutions (used to support 448×448, 560×560, 672×672, etc.).\n",
|
||||
" - Sub-image decomposition (crop-based): for very high resolution (e.g., 1344×1344) the image is split into multiple sub-images (e.g., five 672×672 crops) that are encoded independently and concatenated as a sequence to the LLM.\n",
|
||||
" - Default SFT evaluation results reported at an effective high resolution (1344×1344) via these strategies. Reported improvement with higher resolution (e.g., relative gains up to ~15% average when supporting 1344×1344 vs 336×336).\n",
|
||||
"- Chain-of-thought & few-shot in-context evaluation after SFT:\n",
|
||||
" - MathVista is used to quantify few-shot chain-of-thought capability: example results show 0-shot 39.4, 4-shot 41.9, and an 8-shot mixed-resolution in-context setup achieves 44.4.\n",
|
||||
" - Mixed-resolution in-context strategy: to fit more examples in context while managing token cost of high-resolution sub-image decomposition, some in-context examples are encoded at lower resolution and only the last N examples use full high-resolution decomposition (N=3 in reported experiments).\n",
|
||||
"\n",
|
||||
"Ablation studies and analyses\n",
|
||||
"- Overall ablation design:\n",
|
||||
" - A compact base configuration is used for systematic ablations: ViT-L/14 image encoder (CLIP), C-Abstractor connector with 144 image tokens, pre-training mixture 45% captioned images / 45% interleaved image-text / 10% text-only, and a 1.2B decoder-only LLM for many ablations.\n",
|
||||
" - One component changed at a time; evaluations are zero-/few-shot across the same captioning and VQA benchmarks.\n",
|
||||
"- Image encoder ablations:\n",
|
||||
" - Compared contrastive (CLIP variants trained on DFN-5B, VeCap-300M, OpenAI CLIP) against reconstructive losses (AIM models).\n",
|
||||
" - Resolution ablations: 224 → 336 → 378 px; clear finding that image resolution has the largest impact, followed by encoder capacity and training data composition. Increasing resolution yielded ~3% absolute boost in many metrics.\n",
|
||||
" - Encoder size: ViT-L → ViT-H shows modest gains (typically <1% absolute).\n",
|
||||
" - Training data for encoders: inclusion of synthetic caption data (VeCap) yields non-trivial few-shot improvements.\n",
|
||||
" - Table-based reporting of 0-/4-/8-shot metrics for these variants.\n",
|
||||
"- Vision-language (VL) connector ablations:\n",
|
||||
" - Connector types: average pooling (grid pooling + linear), attention pooling (learnable queries), and C-Abstractor (convolutional mapping / ResNet-based projector).\n",
|
||||
" - Image token counts: experiments with 64 vs 144 image tokens per image.\n",
|
||||
" - Findings: number of visual tokens and image resolution matter most; the particular connector architecture has comparatively little effect on final performance. Detailed 0/4/8-shot tables compare pooling strategies across token counts and resolutions.\n",
|
||||
"- Pre-training data mixture ablations:\n",
|
||||
" - Systematically varied mixes of captioned image pairs vs interleaved image-text documents vs text-only data. Examples tested: 100% caption, mixtures such as 66/33, 50/50, and 0/100, and image/text-only ratios (e.g., 91/9, 86/14, 66/33).\n",
|
||||
" - Key lessons:\n",
|
||||
" - Interleaved documents are critical for few-shot and text-only performance; captioning data strongly lifts zero-shot captioning performance.\n",
|
||||
" - Text-only data helps preserve/boost few-shot and text-only performance; including ~9–14% text-only yields a better balance.\n",
|
||||
" - A final recommended pre-training mix is 45% interleaved / 45% image-caption / 10% text-only to balance zero- and few-shot capabilities.\n",
|
||||
" - Impact of synthetic VeCap captions: even though small (~7% of caption pool), VeCap gives measurable few-shot gains (e.g., 2.4% and 4% absolute in reported settings).\n",
|
||||
"- SFT-specific ablations:\n",
|
||||
" - Repeating data-mixture and connector ablations in the SFT context: caption-pretraining helps SFT zero-shot metrics; choice of VL connector still has limited effect though finer differences appear at high token counts; freezing vs unfreezing the image encoder matters (frozen better at lower resolution; unfrozen better for high-resolution SFT).\n",
|
||||
"- Hyperparameter and optimization ablations:\n",
|
||||
" - Learning-rate grid searches run at small scales (models 9M, 85M, 302M, 1.2B) and 50k-step probes, then a log-linear fit extrapolated to larger model sizes. Grid-search experiments used 50k training steps for each setting.\n",
|
||||
" - Resulting scaling rule and fitted formula for optimal peak learning rate as a function of LLM parameter count is provided and used to choose LRs for the 3B/7B/30B models (e.g., final LRs used: 6e-5 (3B), 4e-5 (7B), 2e-5 (30B)). Weight decay scaled as λ = 0.1 · η.\n",
|
||||
"- MoE (mixture-of-experts) experiments:\n",
|
||||
" - Two MoE designs: 3B-MoE with 64 experts (∼64B total params, top-2 gating, replace every-2 layers) and 7B-MoE with 32 experts (∼47B total params, replace every-4 layers).\n",
|
||||
" - Training used top-2 gating, load-balance loss coefficient 0.01, router z-loss 0.001, and otherwise the same hyperparameters and data mixture as the dense backbones. MoE variants show uniform improvements over dense counterparts on many SFT benchmarks.\n",
|
||||
"- Additional implementation/evaluation notes:\n",
|
||||
" - Pre-training: models trained unfrozen for 200k steps (≈400B tokens) with batch size 512 and sequence length 4096, allowing up to 16 images per sequence and 144 tokens per image (≈1M text tokens + 1M image tokens per batch in the final setup). The pre-training mixture is fixed deterministically for reproducibility.\n",
|
||||
" - Pre-training evaluation prompts, stop tokens, and postprocessing are standardized (greedy decoding), and detailed splits used for each benchmark are specified.\n",
|
||||
" - SFT evaluation meta-average: benchmarks are normalized to a compact baseline configuration prior to averaging so disparate metrics can be compared.\n",
|
||||
" - For high-resolution SFT, the positional interpolation approach (to support larger patches) and the sub-image decomposition scheme (to represent very large images as multiple crops) are both used and evaluated; sub-image decomposition increases the number of image tokens dramatically, which motivates mixed-resolution in-context examples for few-shot prompting.\n",
|
||||
"\n",
|
||||
"Reporting and comparisons\n",
|
||||
"- Tabular reporting:\n",
|
||||
" - Pre-training few-shot results are reported in detailed tables per model scale (3B, 7B, 30B) for 0/4/8/16-shot where applicable, across captioning and VQA datasets.\n",
|
||||
" - SFT comparisons show per-benchmark numbers and a combined meta-average; both dense and MoE model variants are included.\n",
|
||||
"- Baselines and contemporaries cited for direct comparison include Flamingo, IDEFICS, Emu2, LLaVA-NeXT, CogVLM, Gemini family, GPT4V, and many instruction-tuned MLLMs. Where appropriate, notes on differences in prompting setups (e.g., some baselines include text-only demonstrations in “0” prompts) are documented.\n",
|
||||
"- Qualitative analysis:\n",
|
||||
" - A variety of qualitative examples shown for counting, OCR, multi-image reasoning, style following, instruction following, and chain-of-thought reasoning; these accompany quantitative results to illustrate capabilities such as multi-image reasoning and few-shot chain-of-thought.\n",
|
||||
"\n",
|
||||
"Key reported evaluation figures (examples)\n",
|
||||
"- Pre-training duration: 200k steps (~400B tokens).\n",
|
||||
"- Pre-training batch & context: batch 512, sequence length 4096, up to 16 images per sequence, 144 tokens per image.\n",
|
||||
"- SFT: 10k steps; batch 256; seq length 2048; AdaFactor with peak LR 1e-5.\n",
|
||||
"- MoE variants: 3B backbone + 64 experts (∼64B total); 7B backbone + 32 experts (∼47B total); top-2 gating; load-balance and router regularizers used.\n",
|
||||
"- Example few-shot chain-of-thought: MathVista 0-shot 39.4 → 4-shot 41.9 → 8-shot with mixed-resolution 44.4.\n",
|
||||
"\n",
|
||||
"In summary\n",
|
||||
"- Evaluation is multi-faceted: systematic pre-training zero-/few-shot tests on captioning and VQA, text-only TextCore checks, extensive SFT across a broad benchmark suite, ablations covering image encoder, VL connector, data mixtures, training hyperparameters, and input-resolution strategies, plus experiments with MoE scaling. Metrics include CIDEr for captioning, accuracy for VQA and other benchmarks, TextCore aggregated scores, and a normalized meta-average for SFT. The authors report results across multiple model sizes and variants and compare to a broad set of recent multimodal models.\n",
|
||||
"\n",
|
||||
"================\n",
|
||||
"\n",
|
||||
"Short answer: the authors evaluate across (1) pre-training zero-/few-shot benchmarks (captioning, VQA, and a text-only suite), (2) supervised instruction fine‑tuning (SFT) on a large multimodal mixture with extensive downstream benchmarks, and (3) targeted analyses (in‑context/few‑shot learning, chain‑of‑thought, multi‑image reasoning). They report standard task metrics (CIDEr for captioning, accuracy for VQA/QA, aggregated TextCore scores, and a normalized SFT meta‑average), compare to many recent MLLMs, and run systematic ablations (encoder, connector, data mixtures, hyperparameters, resolution/tokenization, MoE). Key training/eval settings and special setups are also evaluated (positional interpolation, sub‑image decomposition, synthetic caption data). Details:\n",
|
||||
"\n",
|
||||
"1) Pre‑training evaluation\n",
|
||||
"- Tasks and datasets:\n",
|
||||
" - Image captioning: COCO (Karpathy test), NoCaps (val), TextCaps (val).\n",
|
||||
" - VQA/text‑in‑image: VQAv2 (testdev), TextVQA, VizWiz, GQA, OK‑VQA, etc.\n",
|
||||
" - TextCore: a text‑only suite (ARC, PIQA, LAMBADA, WinoGrande, HellaSWAG, SciQ, TriviaQA, WebQS) to check language preservation.\n",
|
||||
"- Prompting & decoding:\n",
|
||||
" - Zero/4/8 (and sometimes 16) shot prompts; few‑shot examples sampled from train/val ensuring no leakage.\n",
|
||||
" - Greedy decoding with task‑specific stop tokens; VQA postprocessing matches Flamingo style.\n",
|
||||
"- Metrics:\n",
|
||||
" - CIDEr for captioning, accuracy (%) for VQA/QA tasks, aggregated TextCore scores for language capability.\n",
|
||||
"- Model scales for evaluation:\n",
|
||||
" - Ablations often use a small base LLM (1.2B, sometimes 2.9B). Final pre‑trained models evaluated at 3B, 7B, 30B (dense) and MoE variants.\n",
|
||||
"- Baselines:\n",
|
||||
" - Compared against Flamingo, Emu2, IDEFICS, and other published pre‑trained MLLMs when few‑shot pretraining numbers are available.\n",
|
||||
"\n",
|
||||
"2) Supervised fine‑tuning (SFT) evaluation\n",
|
||||
"- SFT data:\n",
|
||||
" - ≈1.45M instruction examples: GPT‑4/GPT‑4V synthetic instruction data (LLaVA‑Conv/Complex, ShareGPT‑4V), many academic VL datasets (VQAv2, GQA, OKVQA, COCO Captions, TextCaps, OCRVQA, ChartQA, DocVQA, etc.), and a small internal text SFT set.\n",
|
||||
"- Fine‑tuning procedure:\n",
|
||||
" - 10k steps, batch 256, seq length 2048, AdaFactor optimizer, peak LR 1e‑5 with cosine decay. Image encoder and LLM unfrozen unless ablated.\n",
|
||||
"- Downstream benchmarks and reporting:\n",
|
||||
" - 12+ multimodal benchmarks for SFT evaluation (VQAv2, TextVQA, ScienceQA‑IMG, MMMU, MathVista, MME, MMBench, SEED‑Bench, POPE, LLaVA‑BiW, MM‑Vet, etc.). Results reported per dataset and combined into a normalized meta‑average for fair aggregation across heterogeneous metrics.\n",
|
||||
"- Baselines:\n",
|
||||
" - Compared to instruction‑tuned contemporaries: LLaVA/NeXT, InstructBLIP, Qwen‑VL, Emu2‑Chat, CogVLM, Gemini family, GPT4V where available.\n",
|
||||
"\n",
|
||||
"3) Targeted analyses (in‑context learning, CoT, multi‑image)\n",
|
||||
"- In‑context/few‑shot: standard 0/4/8‑shot probes across captioning and VQA.\n",
|
||||
"- Chain‑of‑thought: MathVista used to quantify few‑shot CoT; reported example: 0‑shot 39.4 → 4‑shot 41.9 → 8‑shot mixed‑resolution 44.4.\n",
|
||||
"- Multi‑image reasoning: evaluated qualitatively and quantitatively on multi‑image benchmarks and examples.\n",
|
||||
"\n",
|
||||
"4) Ablation studies (systematic and extensive)\n",
|
||||
"- Image encoder ablations:\n",
|
||||
" - Contrastive (CLIP variants) vs reconstructive (AIM); encoder size (ViT‑L → ViT‑H); encoder training data (including synthetic caption data VeCap).\n",
|
||||
" - Resolution ablations (e.g., 224 → 336 → 378 px): resolution and number of visual tokens give the largest gains.\n",
|
||||
"- Vision–language connector ablations:\n",
|
||||
" - Connector types (avg‑pooling, attention pooling, C‑Abstractor) and visual token counts (e.g., 64 vs 144). Finding: connector architecture matters far less than token count/resolution.\n",
|
||||
"- Pre‑training data mixture ablations:\n",
|
||||
" - Varied mixes of caption pairs / interleaved image–text documents / text‑only. Key finding: 45% interleaved / 45% caption / 10% text gives the best balance (interleaved documents help few‑shot/text performance; captions boost zero‑shot captioning; text-only preserves language capabilities).\n",
|
||||
" - Small synthetic caption pool (VeCap) provides measurable few‑shot gains.\n",
|
||||
"- SFT ablations:\n",
|
||||
" - Freezing vs unfreezing image encoder in SFT (unfreeze better for high‑resolution), data‑mix effects in SFT, connector behavior at high token counts.\n",
|
||||
"- Hyperparameter & optimizer ablations:\n",
|
||||
" - LR grid searches at small scales (9M → 1.2B) with 50k‑step probes and a fitted scaling rule; final LRs chosen (e.g., ~6e‑5 for 3B, 4e‑5 for 7B, 2e‑5 for 30B for pretraining). Weight decay scaled proportionally.\n",
|
||||
"- MoE experiments:\n",
|
||||
" - Two MoE setups: 3B backbone + 64 experts (~64B params) and 7B + 32 experts (~47B params), top‑2 gating, load‑balance/reg losses; MoE variants yield uniform improvements on many SFT benchmarks.\n",
|
||||
"\n",
|
||||
"5) Special evaluation/training setups and numbers\n",
|
||||
"- Pretraining infrastructure & settings:\n",
|
||||
" - Pretraining: ≈200k steps (~400B tokens), batch 512, seq length 4096, allow up to 16 images per sequence, 144 tokens per image in final setup. Pretraining mixture fixed deterministically.\n",
|
||||
"- High‑resolution support:\n",
|
||||
" - Positional embedding interpolation to adapt ViT positional embeddings to larger resolutions.\n",
|
||||
" - Sub‑image decomposition (split very large images into multiple crops, encode independently, and concatenate visual tokens) to support extremely high effective resolution (e.g., 1344×1344 as five 672×672 crops).\n",
|
||||
" - Mixed‑resolution in‑context strategy to keep context capacity reasonable while enabling high‑resolution targets in the last few shots.\n",
|
||||
"- Decoding/postprocessing:\n",
|
||||
" - Greedy decoding; task‑specific stops; standardized postprocessing to align with prior work.\n",
|
||||
"- Reporting conventions:\n",
|
||||
" - 0/4/8‑shot pretraining tables, SFT per‑dataset numbers and a normalized meta‑average, and qualitative examples (counting, OCR, style following, multi‑image reasoning, CoT).\n",
|
||||
"\n",
|
||||
"6) Qualitative analysis\n",
|
||||
"- Numerous qualitative examples illustrating multi‑image reasoning, counting, OCR, instruction following, and chain‑of‑thought behaviors accompany the quantitative results.\n",
|
||||
"\n",
|
||||
"In short: the evaluation is broad (pretraining few‑shot, SFT, targeted capability probes), quantitatively rigorous (CIDEr/accuracy/meta‑averages), compares to many contemporary MLLMs, and is supported by wide ablations (encoder, connector, data, optimization, resolution, MoE) and practical high‑resolution evaluation techniques (positional interpolation, sub‑image decomposition, mixed‑resolution in‑context).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(str(resp))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Added user message to memory: How do the authors evaluate their work?\n",
|
||||
"=== Calling Function ===\n",
|
||||
"Calling function: search with args: {\"input\":\"evaluation methods\"}\n",
|
||||
"Got output: The evaluation methods involve synthesizing all benchmark results into a single meta-average number to simplify comparisons. This is achieved by normalizing the evaluation metrics with respect to a baseline configuration, standardizing the results for each task, adjusting every metric by dividing it by its respective baseline, and then averaging across all metrics.\n",
|
||||
"========================\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"resp = agent.chat(\"How do the authors evaluate their work?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The authors evaluate their work by synthesizing all benchmark results into a single meta-average number to simplify comparisons. They normalize the evaluation metrics with respect to a baseline configuration, standardize the results for each task, adjust every metric by dividing it by its respective baseline, and then average across all metrics for evaluation.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(str(resp))"
|
||||
"handler = agent.run(\"How do the authors evaluate their work?\", ctx=ctx)\n",
|
||||
"async for ev in handler.stream_events():\n",
|
||||
" if isinstance(ev, ToolCall):\n",
|
||||
" print(f\"Calling tool {ev.tool_name} with args {ev.tool_kwargs}\")\n",
|
||||
" elif isinstance(ev, ToolCallResult):\n",
|
||||
" print(f\"Tool call {ev.tool_name}({ev.tool_kwargs}) returned {ev.tool_output}\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"\\n================\\n\")\n",
|
||||
"\n",
|
||||
"resp = await handler\n",
|
||||
"print(resp)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama-parse-aNC435Vv-py3.10",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
Binary file not shown.
@@ -11,9 +11,10 @@
|
||||
"\n",
|
||||
"This example shows off LlamaParse parsing capabilities to build a functioning query pipeline over the Caltrain weekend schedule, a big timetable containing all trains northbound and southbound and their stops in various cities.\n",
|
||||
"\n",
|
||||
"Naive parsing solutions mess up in representing this tabular representation, leading to LLM hallucinations. In contrast, LlamaParse text-mode spatially lays out the table in a neat format, enabling more sophisticated LLMs like gpt-4-turbo to understand the spacing and reason over all the numbers.\n",
|
||||
"\n",
|
||||
"**NOTE**: LlamaParse markdown mode doesn't quite work yet - it's in development!"
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -26,18 +27,6 @@
|
||||
"Download the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e6ae2e38-30c9-4865-aa13-47780bc3848f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -55,7 +44,7 @@
|
||||
"source": [
|
||||
"## Initialize LlamaParse\n",
|
||||
"\n",
|
||||
"Initialize LlamaParse in `text` mode which will represent complex documents incl. text, tables, and figures as nicely formatted text."
|
||||
"Parse the text results from `LlamaParse`, which will represent complex documents incl. text, tables, and figures as nicely formatted text."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -64,26 +53,29 @@
|
||||
"id": "54aa9579-84d4-49bc-ab54-5474e69c1188",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/jerryliu/Programming/llama_parse/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 5f73353a-1f4b-480d-9eea-58d1d22b75f6\n"
|
||||
"Started parsing the file under job_id d162724f-dcb9-4bfe-9bd4-337244906fb8\n",
|
||||
".."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"docs = LlamaParse(result_type=\"text\").load_data(\"./caltrain_schedule_weekend.pdf\")"
|
||||
"result = await LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" api_key=\"llx-...\",\n",
|
||||
").aparse(\"./caltrain_schedule_weekend.pdf\")\n",
|
||||
"\n",
|
||||
"documents = result.get_text_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -104,73 +96,44 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ZONE 2ZONE 3ZONE 4ZONE 4 ZONE 3ZONE 2ZONE 1ZONE 1\n",
|
||||
" Printer-Friendly Caltrain Schedule\n",
|
||||
" Northbound – WEEKEND SERVICE to SAN FRANCISCO 2XX Local\n",
|
||||
" Printer Friendly WEEKEND Caltrain Schedule\n",
|
||||
" Morning to Early Afternoon Page 1 of 2\n",
|
||||
" Northbound – WEEKEND SERVICE to SAN FRANCISCO 6XX Local\n",
|
||||
" Train No. 601 603 605 607 609 611 613 615 617 619 621 623 625 627 629 631\n",
|
||||
" Tamien 6:51a 7:51a 8:51a 9:51a 10:51a 11:51a 12:51p 1:51p\n",
|
||||
" San Jose Diridon 6:56a 7:26a 7:56a 8:26a 8:56a 9:26a 9:56a 10:26a 10:56a 11:26a 11:56a 12:26p 12:56p 1:26p 1:56p 2:26p\n",
|
||||
" Santa Clara 7:03a 7:33a 8:03a 8:33a 9:03a 9:33a 10:03a 10:33a 11:03a 11:33a 12:03p 12:33p 1:03p 1:33p 2:03p 2:33p\n",
|
||||
"ZONE 4 Lawrence 7:08a 7:38a 8:08a 8:38a 9:08a 9:38a 10:08a 10:38a 11:08a 11:38a 12:08p 12:38p 1:08p 1:38p 2:08p 2:38p\n",
|
||||
"\n",
|
||||
" Sunnyvale 7:12a 7:42a 8:12a 8:42a 9:12a 9:42a 10:12a 10:42a 11:12a 11:42a 12:12p 12:42p 1:12p 1:42p 2:12p 2:42p\n",
|
||||
" Mountain View 7:16a 7:46a 8:16a 8:46a 9:16a 9:46a 10:16a 10:46a 11:16a 11:46a 12:16p 12:46p 1:16p 1:46p 2:16p 2:46p\n",
|
||||
" San Antonio 7:19a 7:49a 8:19a 8:49a 9:19a 9:49a 10:19a 10:49a 11:19a 11:49a 12:19p 12:49p 1:19p 1:49p 2:19p 2:49p\n",
|
||||
" California Ave 7:22a 7:52a 8:22a 8:52a 9:22a 9:52a 10:22a 10:52a 11:22a 11:52a 12:22p 12:52p 1:22p 1:52p 2:22p 2:52p\n",
|
||||
" Palo Alto 7:25a 7:55a 8:25a 8:55a 9:25a 9:55a 10:25a 10:55a 11:25a 11:55a 12:25p 12:55p 1:25p 1:55p 2:25p 2:55p\n",
|
||||
"ZONE 3 Menlo Park 7:27a 7:57a 8:27a 8:57a 9:27a 9:57a 10:27a 10:57a 11:27a 11:57a 12:27p 12:57p 1:27p 1:57p 2:27p 2:57p\n",
|
||||
"\n",
|
||||
" Train No. 221 225 229 233 237 241 245 249 253 257 261 265 269 273 *277 *281\n",
|
||||
" Service Types L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2\n",
|
||||
" Tamien 7:12a 9:05a 10:05a 11:05a 1:05p 3:05p 5:05p 7:05p 9:05p 11:05p\n",
|
||||
" San Jose Diridon 7:19a 9:12a 10:12a 11:12a 12:12p 1:12p 2:12p 3:12p 4:12p 5:12p 6:12p 7:12p 8:12p 9:12p 10:19p 11:12p\n",
|
||||
" Santa Clara 7:25a 9:18a 10:18a 11:18a 12:18p 1:18p 2:18p 3:18p 4:18p 5:18p 6:18p 7:18p 8:18p 9:18p 10:25p 11:18p\n",
|
||||
" Lawrence 7:31a 9:24a 10:24a 11:24a 12:24p 1:24p 2:24p 3:24p 4:24p 5:24p 6:24p 7:24p 8:24p 9:24p 10:31p 11:24p\n",
|
||||
" Sunnyvale 7:35a 9:28a 10:28a 11:28a 12:28p 1:28p 2:28p 3:28p 4:28p 5:28p 6:28p 7:28p 8:28p 9:28p 10:35p 11:28p\n",
|
||||
" Mountain View 7:40a 9:34a 10:34a 11:34a 12:34p 1:34p 2:34p 3:34p 4:34p 5:34p 6:34p 7:34p 8:34p 9:34p 10:40p 11:34p\n",
|
||||
" San Antonio 7:43a 9:37a 10:37a 11:37a 12:37p 1:37p 2:37p 3:37p 4:37p 5:37p 6:37p 7:37p 8:37p 9:37p 10:44p 11:37p\n",
|
||||
" California Ave 7:48a 9:42a 10:42a 11:42a 12:42p 1:42p 2:42p 3:42p 4:42p 5:42p 6:42p 7:42p 8:42p 9:42p 10:48p 11:42p\n",
|
||||
" Palo Alto 7:52a 9:46a 10:46a 11:46a 12:46p 1:46p 2:46p 3:46p 4:46p 5:46p 6:46p 7:46p 8:46p 9:46p 10:53p 11:46p\n",
|
||||
" Menlo Park 7:55a 9:50a 10:50a 11:50a 12:50p 1:50p 2:50p 3:50p 4:50p 5:50p 6:50p 7:50p 8:50p 9:50p 10:56p 11:50p\n",
|
||||
" Redwood City 8:01a 9:56a 10:56a 11:56a 12:56p 1:56p 2:56p 3:56p 4:56p 5:56p 6:56p 7:56p 8:56p 9:56p 11:02p 11:56p\n",
|
||||
" San Carlos 8:05a 10:01a 11:01a 12:01p 1:01p 2:01p 3:01p 4:01p 5:01p 6:01p 7:01p 8:01p 9:01p 10:01p 11:07p 12:01a\n",
|
||||
" Belmont 8:09a 10:04a 11:04a 12:04p 1:04p 2:04p 3:04p 4:04p 5:04p 6:04p 7:04p 8:04p 9:04p 10:04p 11:10p 12:04a\n",
|
||||
" Hillsdale 8:12a 10:08a 11:08a 12:08p 1:08p 2:08p 3:08p 4:08p 5:08p 6:08p 7:08p 8:08p 9:08p 10:08p 11:14p 12:08a\n",
|
||||
" Hayward Park 8:15a 10:11a 11:11a 12:11p 1:11p 2:11p 3:11p 4:11p 5:11p 6:11p 7:11p 8:11p 9:11p 10:11p 11:17p 12:11a\n",
|
||||
" San Mateo 8:19a 10:15a 11:15a 12:15p 1:15p 2:15p 3:15p 4:15p 5:15p 6:15p 7:15p 8:15p 9:15p 10:15p 11:21p 12:15a\n",
|
||||
" Burlingame 8:22a 10:19a 11:19a 12:19p 1:19p 2:19p 3:19p 4:19p 5:19p 6:19p 7:19p 8:19p 9:19p 10:19p 11:25p 12:19a\n",
|
||||
" Broadway 8:25a 10:22a 11:22a 12:22p 1:22p 2:22p 3:22p 4:22p 5:22p 6:22p 7:22p 8:22p 9:22p 10:22p 11:28p 12:22a\n",
|
||||
" Millbrae 8:29a 10:26a 11:26a 12:26p 1:26p 2:26p 3:26p 4:26p 5:26p 6:26p 7:26p 8:26p 9:26p 10:26p 11:32p 12:26a\n",
|
||||
" San Bruno 8:34a 10:30a 11:30a 12:30p 1:30p 2:30p 3:30p 4:30p 5:30p 6:30p 7:30p 8:30p 9:30p 10:30p 11:37p 12:30a\n",
|
||||
" S. San Francisco 8:38a 10:34a 11:34a 12:34p 1:34p 2:34p 3:34p 4:34p 5:34p 6:34p 7:34p 8:34p 9:34p 10:34p 11:41p 12:34a\n",
|
||||
" Bayshore 8:44a 10:41a 11:41a 12:41p 1:41p 2:41p 3:41p 4:41p 5:41p 6:41p 7:41p 8:41p 9:41p 10:41p 11:47p 12:41a\n",
|
||||
" 22 ndStreet 8:50a 10:46a 11:46a 12:46p 1:46p 2:46p 3:46p 4:46p 5:46p 6:46p 7:46p 8:46p 9:46p 10:46p 11:53p 12:46a\n",
|
||||
" San Francisco 8:56a 10:52a 11:53a 12:53p 1:52p 2:52p 3:52p 4:52p 5:52p 6:52p 7:52p 8:52p 9:52p 10:52p 11:59p 12:52a\n",
|
||||
" *On SAP Center event days, Train 277 or Train 281departure from San Jose Diridon station may be delayed and will depart no later than 10:30p or 11:30p respectively.\n",
|
||||
" Redwood City 7:32a 8:02a 8:32a 9:02a 9:32a 10:02a 10:32a 11:02a 11:32a 12:02p 12:32p 1:02p 1:32p 2:02p 2:32p 3:02p\n",
|
||||
" San Carlos 7:35a 8:05a 8:35a 9:05a 9:35a 10:05a 10:35a 11:05a 11:35a 12:05p 12:35p 1:05p 1:35p 2:05p 2:35p 3:05p\n",
|
||||
" Belmont 7:38a 8:08a 8:38a 9:08a 9:38a 10:08a 10:38a 11:08a 11:38a 12:08p 12:38p 1:08p 1:38p 2:08p 2:38p 3:08p\n",
|
||||
" Hillsdale 7:41a 8:11a 8:41a 9:11a 9:41a 10:11a 10:41a 11:11a 11:41a 12:11p 12:41p 1:11p 1:41p 2:11p 2:41p 3:11p\n",
|
||||
" Hayward Park 7:43a 8:13a 8:43a 9:13a 9:43a 10:13a 10:43a 11:13a 11:43a 12:13p 12:43p 1:13p 1:43p 2:13p 2:43p 3:13p\n",
|
||||
" San Mateo 7:46a 8:16a 8:46a 9:16a 9:46a 10:16a 10:46a 11:16a 11:46a 12:16p 12:46p 1:16p 1:46p 2:16p 2:46p 3:16p\n",
|
||||
" Burlingame 7:48a 8:18a 8:48a 9:18a 9:48a 10:18a 10:48a 11:18a 11:48a 12:18p 12:48p 1:18p 1:48p 2:18p 2:48p 3:18p\n",
|
||||
" Broadway 7:51a 8:21a 8:51a 9:21a 9:51a 10:21a 10:51a 11:21a 11:51a 12:21p 12:51p 1:21p 1:51p 2:21p 2:51p 3:21p\n",
|
||||
"ZONE 2 Millbrae 7:54a 8:24a 8:54a 9:24a 9:54a 10:24a 10:54a 11:24a 11:54a 12:24p 12:54p 1:24p 1:54p 2:24p 2:54p 3:24p\n",
|
||||
"\n",
|
||||
" San Bruno 7:57a 8:27a 8:57a 9:27a 9:57a 10:27a 10:57a 11:27a 11:57a 12:27p 12:57p 1:27p 1:57p 2:27p 2:57p 3:27p\n",
|
||||
" S. San Francisco 8:00a 8:30a 9:00a 9:30a 10:00a 10:30a 11:00a 11:30a 12:00p 12:30p 1:00p 1:30p 2:00p 2:30p 3:00p 3:30p\n",
|
||||
" Bayshore 8:05a 8:35a 9:05a 9:35a 10:05a 10:35a 11:05a 11:35a 12:05p 12:35p 1:05p 1:35p 2:05p 2:35p 3:05p 3:35p\n",
|
||||
" 22ⁿᵈ Street 8:10a 8:40a 9:10a 9:40a 10:10a 10:40a 11:10a 11:40a 12:10p 12:40p 1:10p 1:40p 2:10p 2:40p 3:10p 3:40p\n",
|
||||
"ZONE 1 San Francisco 8:15a 8:45a 9:15a 9:45a 10:15a 10:45a 11:15a 11:45a 12:15p 12:45p 1:15p 1:45p 2:15p 2:45p 3:15p 3:45p\n",
|
||||
"\n",
|
||||
" Southbound – WEEKEND SERVICE to SAN JOSE 2XX Local\n",
|
||||
" Train No. 224 228 232 236 240 244 248 252 256 260 264 268 272 276 280 284\n",
|
||||
" Service Types L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2\n",
|
||||
" San Francisco 8:28a 9:58a 10:58a 11:58a 12:58p 1:58p 2:58p 3:58p 4:58p 5:58p 6:58p 7:58p 8:58p 9:58p 10:58p 12:05a\n",
|
||||
" 22 ndStreet 8:33a 10:03a 11:03a 12:03p 1:03p 2:03p 3:03p 4:03p 5:03p 6:03p 7:03p 8:03p 9:03p 10:03p 11:03p 12:10a\n",
|
||||
" Bayshore 8:38a 10:08a 11:08a 12:08p 1:08p 2:08p 3:08p 4:08p 5:08p 6:08p 7:08p 8:08p 9:08p 10:08p 11:08p 12:15a\n",
|
||||
" S. San Francisco 8:45a 10:15a 11:15a 12:15p 1:15p 2:15p 3:15p 4:15p 5:15p 6:15p 7:15p 8:15p 9:15p 10:15p 11:15p 12:22a\n",
|
||||
" San Bruno 8:49a 10:19a 11:19a 12:19p 1:19p 2:19p 3:19p 4:19p 5:19p 6:19p 7:19p 8:19p 9:19p 10:19p 11:19p 12:26a\n",
|
||||
" Millbrae 8:53a 10:24a 11:24a 12:24p 1:24p 2:24p 3:24p 4:24p 5:24p 6:24p 7:24p 8:24p 9:24p 10:24p 11:24p 12:31a\n",
|
||||
" Broadway 8:57a 10:27a 11:27a 12:27p 1:27p 2:27p 3:27p 4:27p 5:27p 6:27p 7:27p 8:27p 9:27p 10:27p 11:27p 12:35a\n",
|
||||
" Burlingame 9:00a 10:31a 11:31a 12:31p 1:31p 2:31p 3:31p 4:31p 5:31p 6:31p 7:31p 8:31p 9:31p 10:31p 11:31p 12:38a\n",
|
||||
" San Mateo 9:04a 10:34a 11:34a 12:34p 1:34p 2:34p 3:34p 4:34p 5:34p 6:34p 7:34p 8:34p 9:34p 10:34p 11:34p 12:41a\n",
|
||||
" Hayward Park 9:07a 10:37a 11:37a 12:37p 1:37p 2:37p 3:37p 4:37p 5:37p 6:37p 7:37p 8:37p 9:37p 10:37p 11:37p 12:45a\n",
|
||||
" Hillsdale 9:10a 10:41a 11:41a 12:41p 1:41p 2:41p 3:41p 4:41p 5:41p 6:41p 7:41p 8:41p 9:41p 10:41p 11:41p 12:48a\n",
|
||||
" Belmont 9:14a 10:44a 11:44a 12:44p 1:44p 2:44p 3:44p 4:44p 5:44p 6:44p 7:44p 8:44p 9:44p 10:44p 11:44p 12:52a\n",
|
||||
" San Carlos 9:17a 10:48a 11:48a 12:48p 1:48p 2:48p 3:48p 4:48p 5:48p 6:48p 7:48p 8:48p 9:48p 10:48p 11:48p 12:55a\n",
|
||||
" Redwood City 9:21a 10:52a 11:52a 12:52p 1:52p 2:52p 3:52p 4:52p 5:52p 6:52p 7:52p 8:52p 9:52p 10:52p 11:52p 12:59a\n",
|
||||
" Menlo Park 9:28a 10:58a 11:58a 12:58p 1:58p 2:58p 3:58p 4:58p 5:58p 6:58p 7:58p 8:58p 9:58p 10:58p 11:58p 1:05a\n",
|
||||
" Palo Alto 9:32a 11:02a 12:02p 1:02p 2:02p 3:02p 4:02p 5:02p 6:02p 7:02p 8:02p 9:02p 10:02p 11:02p 12:02a 1:09a\n",
|
||||
" California Avenue 9:36a 11:06a 12:06p 1:06p 2:06p 3:06p 4:06p 5:06p 6:06p 7:06p 8:06p 9:06p 10:06p 11:06p 12:06a 1:12a\n",
|
||||
" San Antonio 9:41a 11:11a 12:11p 1:11p 2:11p 3:11p 4:11p 5:11p 6:11p 7:11p 8:11p 9:11p 10:11p 11:11p 12:10a 1:17a\n",
|
||||
" Mountain View 9:45a 11:16a 12:16p 1:16p 2:16p 3:16p 4:16p 5:16p 6:16p 7:16p 8:16p 9:16p 10:16p 11:16p 12:15a 1:21a\n",
|
||||
" Sunnyvale 9:51a 11:21a 12:21p 1:21p 2:21p 3:21p 4:21p 5:21p 6:21p 7:21p 8:21p 9:21p 10:21p 11:21p 12:20a 1:26a\n",
|
||||
" Lawrence 9:55a 11:26a 12:26p 1:26p 2:26p 3:26p 4:26p 5:26p 6:26p 7:26p 8:26p 9:26p 10:26p 11:26p 12:25a 1:31a\n",
|
||||
" Santa Clara 10:01a 11:32a 12:32p 1:32p 2:32p 3:32p 4:32p 5:32p 6:32p 7:32p 8:32p 9:32p 10:32p 11:32p 12:31a 1:37a\n",
|
||||
" San Jose Diridon 10:10a 11:40a 12:40p 1:38p 2:40p 3:38p 4:40p 5:38p 6:40p 7:38p 8:40p 9:38p 10:40p 11:38p 12:39a 1:44a\n",
|
||||
" Tamien 10:15a 11:45a 12:45p 2:45p 4:45p 6:45p 8:45p 10:45p 12:44a 1:49a\n",
|
||||
" EFFECTIVE September 12, 2022 Timetable subject to change without notice.\n"
|
||||
"EFFECTIVE September 21, 2024 Timetable subject to change without notice See Page 2 For Afternoon and Evening Times\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].get_content())"
|
||||
"print(documents[0].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -180,9 +143,7 @@
|
||||
"source": [
|
||||
"## Initialize Query Engine\n",
|
||||
"\n",
|
||||
"We now initialize a query engine over this data. Here we use a baseline summary index, which doesn't do vector indexing/chunking and instead dumps the entire text into the prompt.\n",
|
||||
"\n",
|
||||
"We see that the LLM (gpt-4-turbo) is able to provide all the stops for train no 225 northbound."
|
||||
"We now initialize a query engine over this data. Here we use a baseline summary index, which doesn't do vector indexing/chunking and instead dumps the entire text into the prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -195,8 +156,8 @@
|
||||
"from llama_index.core import SummaryIndex\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(model=\"gpt-4o\")\n",
|
||||
"index = SummaryIndex.from_documents(docs)\n",
|
||||
"llm = OpenAI(model=\"gpt-5-mini\", api_key=\"sk-...\")\n",
|
||||
"index = SummaryIndex.from_documents(documents)\n",
|
||||
"query_engine = index.as_query_engine(llm=llm)"
|
||||
]
|
||||
},
|
||||
@@ -208,7 +169,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = query_engine.query(\n",
|
||||
" \"What are the stops (and times) for train no 237 northbound?\"\n",
|
||||
" \"What are the stops (and times) for train no 609 northbound?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -222,31 +183,32 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The stops and times for train no. 237 northbound are as follows:\n",
|
||||
"Train No. 609 northbound (stops and times):\n",
|
||||
"\n",
|
||||
"- San Jose Diridon: 12:12 PM\n",
|
||||
"- Santa Clara: 12:18 PM\n",
|
||||
"- Lawrence: 12:24 PM\n",
|
||||
"- Sunnyvale: 12:28 PM\n",
|
||||
"- Mountain View: 12:34 PM\n",
|
||||
"- San Antonio: 12:37 PM\n",
|
||||
"- California Ave: 12:42 PM\n",
|
||||
"- Palo Alto: 12:46 PM\n",
|
||||
"- Menlo Park: 12:50 PM\n",
|
||||
"- Redwood City: 12:56 PM\n",
|
||||
"- San Carlos: 1:01 PM\n",
|
||||
"- Belmont: 1:04 PM\n",
|
||||
"- Hillsdale: 1:08 PM\n",
|
||||
"- Hayward Park: 1:11 PM\n",
|
||||
"- San Mateo: 1:15 PM\n",
|
||||
"- Burlingame: 1:19 PM\n",
|
||||
"- Broadway: 1:22 PM\n",
|
||||
"- Millbrae: 1:26 PM\n",
|
||||
"- San Bruno: 1:30 PM\n",
|
||||
"- S. San Francisco: 1:34 PM\n",
|
||||
"- Bayshore: 1:41 PM\n",
|
||||
"- 22nd Street: 1:46 PM\n",
|
||||
"- San Francisco: 1:52 PM\n"
|
||||
"- Tamien — 8:51a\n",
|
||||
"- San Jose Diridon — 8:56a\n",
|
||||
"- Santa Clara — 9:03a\n",
|
||||
"- Lawrence — 9:08a\n",
|
||||
"- Sunnyvale — 9:12a\n",
|
||||
"- Mountain View — 9:16a\n",
|
||||
"- San Antonio — 9:19a\n",
|
||||
"- California Ave — 9:22a\n",
|
||||
"- Palo Alto — 9:25a\n",
|
||||
"- Menlo Park — 9:27a\n",
|
||||
"- Redwood City — 9:32a\n",
|
||||
"- San Carlos — 9:35a\n",
|
||||
"- Belmont — 9:38a\n",
|
||||
"- Hillsdale — 9:41a\n",
|
||||
"- Hayward Park — 9:43a\n",
|
||||
"- San Mateo — 9:46a\n",
|
||||
"- Burlingame — 9:48a\n",
|
||||
"- Broadway — 9:51a\n",
|
||||
"- Millbrae — 9:54a\n",
|
||||
"- San Bruno — 9:57a\n",
|
||||
"- S. San Francisco — 10:00a\n",
|
||||
"- Bayshore — 10:05a\n",
|
||||
"- 22nd Street — 10:10a\n",
|
||||
"- San Francisco — 10:15a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -262,18 +224,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = query_engine.query(\n",
|
||||
" \"What are all the trains (and times) that end at Tamien going Southbound?\"\n",
|
||||
" \"What are all the trains (and times) that end at Redwood City going Southbound?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6cf9fce0-5067-48f6-a7ef-62aa9e2edc3d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It gets most of the answers correct (to be fair it misses two trains)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -284,233 +238,20 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The trains that end at Tamien going Southbound are:\n",
|
||||
"\n",
|
||||
"- Train 224 at 10:15a\n",
|
||||
"- Train 228 at 11:45a\n",
|
||||
"- Train 240 at 2:45p\n",
|
||||
"- Train 248 at 4:45p\n",
|
||||
"- Train 256 at 6:45p\n",
|
||||
"- Train 264 at 8:45p\n",
|
||||
"- Train 272 at 10:45p\n",
|
||||
"- Train 284 at 1:49a\n"
|
||||
"None. On this weekend schedule no southbound trains terminate at Redwood City — every listed southbound train continues beyond Redwood City to later stations (Menlo Park/Palo Alto and onward).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(str(response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e51e7feb-b74f-4101-8963-933ac7ec9763",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Try Baseline\n",
|
||||
"\n",
|
||||
"In contrast, we try a baseline approach with the default PDF reader (PyPDF) in `SimpleDirectoryReader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "364e5155-cc75-4302-a754-9444ae28e6b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core import SimpleDirectoryReader\n",
|
||||
"from llama_index.core import SummaryIndex\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(model=\"gpt-4o\")\n",
|
||||
"input_file = \"caltrain_schedule_weekend.pdf\"\n",
|
||||
"reader = SimpleDirectoryReader(input_files=[input_file])\n",
|
||||
"base_docs = reader.load_data()\n",
|
||||
"index = SummaryIndex.from_documents(base_docs)\n",
|
||||
"base_query_engine = index.as_query_engine(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a4011389-2d27-4a1a-bf8d-7309da28ab15",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Southbound – WEEKEND SERVICE to SAN JOSE\n",
|
||||
"Train No. 224 228 232 236 240 244 248 252 256 260 264 268 272 276 280 284\n",
|
||||
"Service Types L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2\n",
|
||||
"San Francisco 8:28a 9:58a 10:58a 11:58a 12:58p 1:58p 2:58p 3:58p 4:58p 5:58p 6:58p 7:58p 8:58p 9:58p 10:58p 12:05a\n",
|
||||
"22nd Street 8:33a 10:03a 11:03a 12:03p 1:03p 2:03p 3:03p 4:03p 5:03p 6:03p 7:03p 8:03p 9:03p 10:03p 11:03p 12:10a\n",
|
||||
"Bayshore 8:38a 10:08a 11:08a 12:08p 1:08p 2:08p 3:08p 4:08p 5:08p 6:08p 7:08p 8:08p 9:08p 10:08p 11:08p 12:15a\n",
|
||||
"S. San Francisco 8:45a 10:15a 11:15a 12:15p 1:15p 2:15p 3:15p 4:15p 5:15p 6:15p 7:15p 8:15p 9:15p 10:15p 11:15p 12:22a\n",
|
||||
"San Bruno 8:49a 10:19a 11:19a 12:19p 1:19p 2:19p 3:19p 4:19p 5:19p 6:19p 7:19p 8:19p 9:19p 10:19p 11:19p 12:26a\n",
|
||||
"Millbrae 8:53a 10:24a 11:24a 12:24p 1:24p 2:24p 3:24p 4:24p 5:24p 6:24p 7:24p 8:24p 9:24p 10:24p 11:24p 12:31a\n",
|
||||
"Broadway 8:57a 10:27a 11:27a 12:27p 1:27p 2:27p 3:27p 4:27p 5:27p 6:27p 7:27p 8:27p 9:27p 10:27p 11:27p 12:35a\n",
|
||||
"Burlingame 9:00a 10:31a 11:31a 12:31p 1:31p 2:31p 3:31p 4:31p 5:31p 6:31p 7:31p 8:31p 9:31p 10:31p 11:31p 12:38a\n",
|
||||
"San Mateo 9:04a 10:34a 11:34a 12:34p 1:34p 2:34p 3:34p 4:34p 5:34p 6:34p 7:34p 8:34p 9:34p 10:34p 11:34p 12:41a\n",
|
||||
"Hayward Park 9:07a 10:37a 11:37a 12:37p 1:37p 2:37p 3:37p 4:37p 5:37p 6:37p 7:37p 8:37p 9:37p 10:37p 11:37p 12:45a\n",
|
||||
"Hillsdale 9:10a 10:41a 11:41a 12:41p 1:41p 2:41p 3:41p 4:41p 5:41p 6:41p 7:41p 8:41p 9:41p 10:41p 11:41p 12:48a\n",
|
||||
"Belmont 9:14a 10:44a 11:44a 12:44p 1:44p 2:44p 3:44p 4:44p 5:44p 6:44p 7:44p 8:44p 9:44p 10:44p 11:44p 12:52a\n",
|
||||
"San Carlos 9:17a 10:48a 11:48a 12:48p 1:48p 2:48p 3:48p 4:48p 5:48p 6:48p 7:48p 8:48p 9:48p 10:48p 11:48p 12:55a\n",
|
||||
"Redwood City 9:21a 10:52a 11:52a 12:52p 1:52p 2:52p 3:52p 4:52p 5:52p 6:52p 7:52p 8:52p 9:52p 10:52p 11:52p 12:59a\n",
|
||||
"Menlo Park 9:28a 10:58a 11:58a 12:58p 1:58p 2:58p 3:58p 4:58p 5:58p 6:58p 7:58p 8:58p 9:58p 10:58p 11:58p 1:05a\n",
|
||||
"Palo Alto 9:32a 11:02a 12:02p 1:02p 2:02p 3:02p 4:02p 5:02p 6:02p 7:02p 8:02p 9:02p 10:02p 11:02p 12:02a 1:09a\n",
|
||||
"California Avenue 9:36a 11:06a 12:06p 1:06p 2:06p 3:06p 4:06p 5:06p 6:06p 7:06p 8:06p 9:06p 10:06p 11:06p 12:06a 1:12a\n",
|
||||
"San Antonio 9:41a 11:11a 12:11p 1:11p 2:11p 3:11p 4:11p 5:11p 6:11p 7:11p 8:11p 9:11p 10:11p 11:11p 12:10a 1:17a\n",
|
||||
"Mountain View 9:45a 11:16a 12:16p 1:16p 2:16p 3:16p 4:16p 5:16p 6:16p 7:16p 8:16p 9:16p 10:16p 11:16p 12:15a 1:21a\n",
|
||||
"Sunnyvale 9:51a 11:21a 12:21p 1:21p 2:21p 3:21p 4:21p 5:21p 6:21p 7:21p 8:21p 9:21p 10:21p 11:21p 12:20a 1:26a\n",
|
||||
"Lawrence 9:55a 11:26a 12:26p 1:26p 2:26p 3:26p 4:26p 5:26p 6:26p 7:26p 8:26p 9:26p 10:26p 11:26p 12:25a 1:31a\n",
|
||||
"Santa Clara 10:01a 11:32a 12:32p 1:32p 2:32p 3:32p 4:32p 5:32p 6:32p 7:32p 8:32p 9:32p 10:32p 11:32p 12:31a 1:37a\n",
|
||||
"San Jose Diridon 10:10a 11:40a 12:40p 1:38p 2:40p 3:38p 4:40p 5:38p 6:40p 7:38p 8:40p 9:38p 10:40p 11:38p 12:39a 1:44a\n",
|
||||
"Tamien 10:15a 11:45a 12:45p 2:45p 4:45p 6:45p 8:45p 10:45p 12:44a 1:49aPrinter-Friendly Caltrain Schedule\n",
|
||||
"Northbound – WEEKEND SERVICE to SAN FRANCISCO\n",
|
||||
"Train No. 221 225 229 233 237 241 245 249 253 257 261 265 269 273 *277 *281\n",
|
||||
"Service Types L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2 L2\n",
|
||||
"Tamien 7:12a 9:05a 10:05a 11:05a 1:05p 3:05p 5:05p 7:05p 9:05p 11:05p\n",
|
||||
"San Jose Diridon 7:19a 9:12a 10:12a 11:12a 12:12p 1:12p 2:12p 3:12p 4:12p 5:12p 6:12p 7:12p 8:12p 9:12p 10:19p 11:12p\n",
|
||||
"Santa Clara 7:25a 9:18a 10:18a 11:18a 12:18p 1:18p 2:18p 3:18p 4:18p 5:18p 6:18p 7:18p 8:18p 9:18p 10:25p 11:18p\n",
|
||||
"Lawrence 7:31a 9:24a 10:24a 11:24a 12:24p 1:24p 2:24p 3:24p 4:24p 5:24p 6:24p 7:24p 8:24p 9:24p 10:31p 11:24p\n",
|
||||
"Sunnyvale 7:35a 9:28a 10:28a 11:28a 12:28p 1:28p 2:28p 3:28p 4:28p 5:28p 6:28p 7:28p 8:28p 9:28p 10:35p 11:28p\n",
|
||||
"Mountain View 7:40a 9:34a 10:34a 11:34a 12:34p 1:34p 2:34p 3:34p 4:34p 5:34p 6:34p 7:34p 8:34p 9:34p 10:40p 11:34p\n",
|
||||
"San Antonio 7:43a 9:37a 10:37a 11:37a 12:37p 1:37p 2:37p 3:37p 4:37p 5:37p 6:37p 7:37p 8:37p 9:37p 10:44p 11:37p\n",
|
||||
"California Ave 7:48a 9:42a 10:42a 11:42a 12:42p 1:42p 2:42p 3:42p 4:42p 5:42p 6:42p 7:42p 8:42p 9:42p 10:48p 11:42p\n",
|
||||
"Palo Alto 7:52a 9:46a 10:46a 11:46a 12:46p 1:46p 2:46p 3:46p 4:46p 5:46p 6:46p 7:46p 8:46p 9:46p 10:53p 11:46p\n",
|
||||
"Menlo Park 7:55a 9:50a 10:50a 11:50a 12:50p 1:50p 2:50p 3:50p 4:50p 5:50p 6:50p 7:50p 8:50p 9:50p 10:56p 11:50p\n",
|
||||
"Redwood City 8:01a 9:56a 10:56a 11:56a 12:56p 1:56p 2:56p 3:56p 4:56p 5:56p 6:56p 7:56p 8:56p 9:56p 11:02p 11:56p\n",
|
||||
"San Carlos 8:05a 10:01a 11:01a 12:01p 1:01p 2:01p 3:01p 4:01p 5:01p 6:01p 7:01p 8:01p 9:01p 10:01p 11:07p 12:01a\n",
|
||||
"Belmont 8:09a 10:04a 11:04a 12:04p 1:04p 2:04p 3:04p 4:04p 5:04p 6:04p 7:04p 8:04p 9:04p 10:04p 11:10p 12:04a\n",
|
||||
"Hillsdale 8:12a 10:08a 11:08a 12:08p 1:08p 2:08p 3:08p 4:08p 5:08p 6:08p 7:08p 8:08p 9:08p 10:08p 11:14p 12:08a\n",
|
||||
"Hayward Park 8:15a 10:11a 11:11a 12:11p 1:11p 2:11p 3:11p 4:11p 5:11p 6:11p 7:11p 8:11p 9:11p 10:11p 11:17p 12:11a\n",
|
||||
"San Mateo 8:19a 10:15a 11:15a 12:15p 1:15p 2:15p 3:15p 4:15p 5:15p 6:15p 7:15p 8:15p 9:15p 10:15p 11:21p 12:15a\n",
|
||||
"Burlingame 8:22a 10:19a 11:19a 12:19p 1:19p 2:19p 3:19p 4:19p 5:19p 6:19p 7:19p 8:19p 9:19p 10:19p 11:25p 12:19a\n",
|
||||
"Broadway 8:25a 10:22a 11:22a 12:22p 1:22p 2:22p 3:22p 4:22p 5:22p 6:22p 7:22p 8:22p 9:22p 10:22p 11:28p 12:22a\n",
|
||||
"Millbrae 8:29a 10:26a 11:26a 12:26p 1:26p 2:26p 3:26p 4:26p 5:26p 6:26p 7:26p 8:26p 9:26p 10:26p 11:32p 12:26a\n",
|
||||
"San Bruno 8:34a 10:30a 11:30a 12:30p 1:30p 2:30p 3:30p 4:30p 5:30p 6:30p 7:30p 8:30p 9:30p 10:30p 11:37p 12:30a\n",
|
||||
"S. San Francisco 8:38a 10:34a 11:34a 12:34p 1:34p 2:34p 3:34p 4:34p 5:34p 6:34p 7:34p 8:34p 9:34p 10:34p 11:41p 12:34a\n",
|
||||
"Bayshore 8:44a 10:41a 11:41a 12:41p 1:41p 2:41p 3:41p 4:41p 5:41p 6:41p 7:41p 8:41p 9:41p 10:41p 11:47p 12:41a\n",
|
||||
"22nd Street 8:50a 10:46a 11:46a 12:46p 1:46p 2:46p 3:46p 4:46p 5:46p 6:46p 7:46p 8:46p 9:46p 10:46p 11:53p 12:46a\n",
|
||||
"San Francisco 8:56a 10:52a 11:53a 12:53p 1:52p 2:52p 3:52p 4:52p 5:52p 6:52p 7:52p 8:52p 9:52p 10:52p 11:59p 12:52aZONE 2 ZONE 3 ZONE 4 ZONE 4 ZONE 3 ZONE 2 ZONE 1 ZONE 12XX Local\n",
|
||||
"2XX Local\n",
|
||||
"EFFECTIVE September 12, 2022 Timetable subject to change without notice. *On SAP Center event days, Train 277 or Train 281departure from San Jose Diridon station may be delayed and will depart no later than 10:30p or 11:30p respectively.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(base_docs[0].get_content())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "42203c70-7ca7-4200-bf47-6282eefca3bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_response = base_query_engine.query(\n",
|
||||
" \"What are the stops (and times) for train no 237 northbound?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "06aa47b6-0f31-4b2d-90f0-bf6c74befd38",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Train No. 237 northbound stops at the following stations and times:\n",
|
||||
"\n",
|
||||
"- Tamien: 1:05p\n",
|
||||
"- San Jose Diridon: 1:12p\n",
|
||||
"- Santa Clara: 1:18p\n",
|
||||
"- Lawrence: 1:24p\n",
|
||||
"- Sunnyvale: 1:28p\n",
|
||||
"- Mountain View: 1:34p\n",
|
||||
"- San Antonio: 1:37p\n",
|
||||
"- California Ave: 1:42p\n",
|
||||
"- Palo Alto: 1:46p\n",
|
||||
"- Menlo Park: 1:50p\n",
|
||||
"- Redwood City: 1:56p\n",
|
||||
"- San Carlos: 2:01p\n",
|
||||
"- Belmont: 2:04p\n",
|
||||
"- Hillsdale: 2:08p\n",
|
||||
"- Hayward Park: 2:11p\n",
|
||||
"- San Mateo: 2:15p\n",
|
||||
"- Burlingame: 2:19p\n",
|
||||
"- Broadway: 2:22p\n",
|
||||
"- Millbrae: 2:26p\n",
|
||||
"- San Bruno: 2:30p\n",
|
||||
"- S. San Francisco: 2:34p\n",
|
||||
"- Bayshore: 2:41p\n",
|
||||
"- 22nd Street: 2:46p\n",
|
||||
"- San Francisco: 2:52p\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(str(base_response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4f3c1de7-3351-4cd8-991c-34a777952194",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_response = base_query_engine.query(\n",
|
||||
" \"What are all the trains (and times) that end at Tamien going Southbound?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "513b1007-7508-4fb1-836c-de9353433a67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that the trains don't line up with the times!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "108edb92-76af-406b-a139-8b9e7c6528f2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The trains that end at Tamien going Southbound are:\n",
|
||||
"\n",
|
||||
"- Train 224 at 10:15a\n",
|
||||
"- Train 228 at 11:45a\n",
|
||||
"- Train 240 at 2:45p\n",
|
||||
"- Train 252 at 4:45p\n",
|
||||
"- Train 264 at 6:45p\n",
|
||||
"- Train 276 at 8:45p\n",
|
||||
"- Train 284 at 10:45p\n",
|
||||
"- Train 284 at 12:44a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(str(base_response))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -10,7 +10,12 @@
|
||||
"\n",
|
||||
"This notebook is a complete walkthrough for using LlamaParse with advanced indexing/retrieval techniques in LlamaIndex over the Apple 10K Filing. \n",
|
||||
"\n",
|
||||
"This allows us to ask sophisticated questions that aren't possible with \"naive\" parsing/indexing techniques with existing models."
|
||||
"This allows us to ask sophisticated questions that aren't possible with \"naive\" parsing/indexing techniques with existing models.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-18-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,7 +24,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index llama-cloud-services"
|
||||
"%pip install llama-cloud-services \"llama-index>=0.13.2<0.14.0\" \"llama-index-embeddings-huggingface>=0.6.0<0.7.0\" torchvision \"sentence-transformers<5.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,7 +55,7 @@
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\"\n",
|
||||
"\n",
|
||||
"# Using OpenAI API for embeddings/llms\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-proj-...\""
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -64,7 +69,7 @@
|
||||
"from llama_index.core import Settings\n",
|
||||
"\n",
|
||||
"embed_model = OpenAIEmbedding(model_name=\"text-embedding-3-small\")\n",
|
||||
"llm = OpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"llm = OpenAI(model=\"gpt-5-mini\")\n",
|
||||
"\n",
|
||||
"Settings.llm = llm\n",
|
||||
"Settings.embed_model = embed_model"
|
||||
@@ -91,14 +96,27 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id e403a457-1721-4093-82bf-4a316d2d637a\n"
|
||||
"Started parsing the file under job_id f347cb97-dfe2-4677-991a-5ceba6d9fc6a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"result = await LlamaParse(take_screenshot=True).aparse(\"./apple_2021_10k.pdf\")\n",
|
||||
"result = await LlamaParse(\n",
|
||||
" # The parsing mode\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" # The model to use\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" # Whether to use high resolution OCR (Slower)\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" # Adaptive long table. LlamaParse will try to detect long tables across pages\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" # Whether to take a screenshot of the page, needed for screenshot-retrieval\n",
|
||||
" take_screenshot=True,\n",
|
||||
").aparse(\"./apple_2021_10k.pdf\")\n",
|
||||
"\n",
|
||||
"markdown_nodes = await result.aget_markdown_nodes(split_by_page=True)\n",
|
||||
"screenshot_image_nodes = await result.aget_image_nodes(\n",
|
||||
@@ -134,7 +152,16 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 20:53:51,246 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 20:53:52,143 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"\n",
|
||||
@@ -158,7 +185,15 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 20:53:53,070 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"\n",
|
||||
@@ -170,7 +205,22 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/loganmarkewich/llama_parse/py/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n",
|
||||
"2025-08-18 20:53:55,230 - INFO - Load pretrained SentenceTransformer: llamaindex/vdr-2b-multi-v1\n",
|
||||
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n",
|
||||
"2025-08-18 20:54:05,369 - INFO - 2 prompts are loaded, with the keys: ['query', 'text']\n",
|
||||
"Generating embeddings: 0%| | 0/82 [00:00<?, ?it/s]2025-08-18 20:54:06,599 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"Generating embeddings: 100%|██████████| 82/82 [00:01<00:00, 61.24it/s]\n",
|
||||
"Generating image embeddings: 100%|██████████| 82/82 [26:06<00:00, 19.11s/it]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_index.core.indices import MultiModalVectorStoreIndex\n",
|
||||
"from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
|
||||
@@ -182,7 +232,7 @@
|
||||
" model_name=\"llamaindex/vdr-2b-multi-v1\",\n",
|
||||
" embed_batch_size=2,\n",
|
||||
" trust_remote_code=True,\n",
|
||||
" cache_folder=\"./hf_cache_2\",\n",
|
||||
" cache_folder=\"./hf_cache\",\n",
|
||||
" device=\"cpu\", # set to \"cuda\" if you have a GPU or remove to auto-detect\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -337,19 +387,58 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:20:29,006 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:20:38,721 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********Baseline Query Engine***********\n",
|
||||
"The total fair value of marketable securities in 2020 was $190,516 million.\n",
|
||||
"The total fair value of marketable securities in 2020 was $153,814 million (approximately $153.8 billion).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:20:39,233 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:20:48,185 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********Markdown Query Engine***********\n",
|
||||
"The total fair value of marketable securities in 2020 was $191,830 million.\n",
|
||||
"The total fair value was $191,830 million (approximately $191.83 billion).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:20:48,515 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:21:09,275 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********MultiModal Query Engine***********\n",
|
||||
"The total fair value of marketable securities in 2020 was $191,830 million.\n"
|
||||
"The table shows:\n",
|
||||
"\n",
|
||||
"- Total fair value (cash, cash equivalents and marketable securities) in 2020: $191,830 million (≈ $191.83 billion). \n",
|
||||
"- Total marketable securities (current + non‑current) in 2020: $52,927 + $100,887 = $153,814 million (≈ $153.81 billion).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -391,7 +480,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'images/page_41.jpg'"
|
||||
"'images/page_42.jpg'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
@@ -415,32 +504,64 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:35:33,281 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:35:40,959 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********Baseline Query Engine***********\n",
|
||||
"The effective interest rates for the debt issuances in 2021 were as follows:\n",
|
||||
"\n",
|
||||
"- Floating-rate notes: 0.48% – 0.63%\n",
|
||||
"- Fixed-rate notes: 0.03% – 4.78% for maturities from 2022 to 2060\n",
|
||||
"- Fixed-rate notes issued in the second quarter: 0.75% – 2.81% for maturities from 2026 to 2061\n",
|
||||
"- Fixed-rate notes issued in the fourth quarter: 1.43% – 2.86% for maturities from 2028 to 2061\n",
|
||||
"- Second quarter 2021 fixed-rate notes (2026–2061): effective interest rates 0.75%–2.81%\n",
|
||||
"- Fourth quarter 2021 fixed-rate notes (2028–2061): effective interest rates 1.43%–2.86%\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:35:41,285 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:35:49,132 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********Markdown Query Engine***********\n",
|
||||
"The effective interest rates for the debt issuances in 2021 were as follows:\n",
|
||||
"\n",
|
||||
"- Floating-rate notes: 0.48% – 0.63%\n",
|
||||
"- Fixed-rate notes: 0.03% – 4.78% for the 0.000% – 4.650% notes, 0.75% – 2.81% for the 0.700% – 2.800% notes, and 1.43% – 2.86% for the 1.400% – 2.850% notes.\n",
|
||||
"- Floating-rate notes (2022): 0.48% – 0.63%\n",
|
||||
"- Fixed-rate 0.000% – 4.650% notes (2022 – 2060): 0.03% – 4.78%\n",
|
||||
"- Second-quarter 2021 fixed-rate notes (0.700% – 2.800%, 2026 – 2061): 0.75% – 2.81%\n",
|
||||
"- Fourth-quarter 2021 fixed-rate notes (1.400% – 2.850%, 2028 – 2061): 1.43% – 2.86%\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:35:49,411 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:36:06,767 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********MultiModal Query Engine***********\n",
|
||||
"The effective interest rates of all debt issuances in 2021 were as follows:\n",
|
||||
"The effective interest rate ranges reported for the 2021 debt issuances were:\n",
|
||||
"\n",
|
||||
"1. **Floating-rate notes**: 0.48% – 0.63%\n",
|
||||
"2. **Fixed-rate 0.000% – 4.650% notes**: 0.03% – 4.78%\n",
|
||||
"3. **Fixed-rate 0.700% – 2.800% notes**: 0.75% – 2.81%\n",
|
||||
"4. **Fixed-rate 1.400% – 2.850% notes**: 1.43% – 2.86%\n"
|
||||
"- Floating‑rate notes (2022): 0.48% – 0.63% \n",
|
||||
"- Fixed‑rate 0.000% – 4.650% notes (2022–2060): 0.03% – 4.78% \n",
|
||||
"- Q2 2021 fixed‑rate notes (0.700% – 2.800%, maturities 2026–2061): 0.75% – 2.81% \n",
|
||||
"- Q4 2021 fixed‑rate notes (1.400% – 2.850%, maturities 2028–2061): 1.43% – 2.86%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -539,42 +660,66 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:36:07,790 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:36:14,197 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********Baseline Query Engine***********\n",
|
||||
"The current state taxes for the years 2019 to 2021 are as follows (in millions):\n",
|
||||
"\n",
|
||||
"- 2021: $1,620\n",
|
||||
"- 2020: $455\n",
|
||||
"- 2019: $475\n",
|
||||
"\n",
|
||||
"This indicates an increase of $1,165 million from 2020 to 2021, a decrease of $20 million from 2018 to 2019, and an increase of $80 million from 2019 to 2020.\n",
|
||||
"State current tax (in millions):\n",
|
||||
"- 2019: +$475 million\n",
|
||||
"- 2020: +$455 million\n",
|
||||
"- 2021: +$1,620 million\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:36:14,584 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:36:22,084 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********Markdown Query Engine***********\n",
|
||||
"The current state taxes for the years 2019 to 2021 are as follows (in millions):\n",
|
||||
"\n",
|
||||
"- **2021**: $1,620\n",
|
||||
"- **2020**: $455\n",
|
||||
"- **2019**: $475\n",
|
||||
"\n",
|
||||
"The changes in current state taxes from year to year are:\n",
|
||||
"\n",
|
||||
"- From 2019 to 2020: Decrease of $20 million\n",
|
||||
"- From 2020 to 2021: Increase of $1,165 million\n",
|
||||
"2019 — Current state taxes: $475 million (change vs prior year: n/a) \n",
|
||||
"2020 — Current state taxes: $455 million (change vs 2019: −$20 million) \n",
|
||||
"2021 — Current state taxes: $1,620 million (change vs 2020: +$1,165 million)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-18 21:36:22,441 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
|
||||
"2025-08-18 21:36:33,498 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"***********MultiModal Query Engine***********\n",
|
||||
"The current state taxes for the years 2019 to 2021 are as follows (in millions):\n",
|
||||
"The current state tax amounts (in millions) per the Note 5 table are:\n",
|
||||
"\n",
|
||||
"- **2021**: $1,620\n",
|
||||
"- **2020**: $455\n",
|
||||
"- **2019**: $475\n",
|
||||
"- 2019: $475\n",
|
||||
"- 2020: $455 (−$20 vs 2019; −4.2%)\n",
|
||||
"- 2021: $1,620 (+$1,165 vs 2020; +256.0%)\n",
|
||||
"\n",
|
||||
"So, the changes are:\n",
|
||||
"- From 2019 to 2020: Decrease of $20 million\n",
|
||||
"- From 2020 to 2021: Increase of $1,165 million\n"
|
||||
"All amounts are in millions of dollars.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -597,7 +742,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama-parse-aNC435Vv-py3.10",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -6,32 +6,19 @@
|
||||
"source": [
|
||||
"# Using the Raw API\n",
|
||||
"\n",
|
||||
"This notebook walks through how to use the raw API and how"
|
||||
"This notebook walks through how to use the raw API to parse documents.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-18-2025 | N/A | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2024-02-02 11:11:39-- https://arxiv.org/pdf/1706.03762.pdf\n",
|
||||
"Resolving arxiv.org (arxiv.org)... 151.101.131.42, 151.101.3.42, 151.101.67.42, ...\n",
|
||||
"Connecting to arxiv.org (arxiv.org)|151.101.131.42|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 2215244 (2.1M) [application/pdf]\n",
|
||||
"Saving to: ‘./attention.pdf’\n",
|
||||
"\n",
|
||||
"./attention.pdf 100%[===================>] 2.11M --.-KB/s in 0.08s \n",
|
||||
"\n",
|
||||
"2024-02-02 11:11:39 (27.3 MB/s) - ‘./attention.pdf’ saved [2215244/2215244]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget \"https://arxiv.org/pdf/1706.03762.pdf\" -O \"./attention.pdf\""
|
||||
]
|
||||
@@ -62,15 +49,23 @@
|
||||
"with open(file_path, \"rb\") as f:\n",
|
||||
" mime_type = mimetypes.guess_type(file_path)[0]\n",
|
||||
" files = {\"file\": (f.name, f, mime_type)}\n",
|
||||
" body = {\n",
|
||||
" \"parse_mode\": \"parse_page_with_agent\",\n",
|
||||
" \"model\": \"openai-gpt-4-1-mini\",\n",
|
||||
" \"high_res_ocr\": True,\n",
|
||||
" \"adaptive_long_table\": True,\n",
|
||||
" \"outlined_table_extraction\": True,\n",
|
||||
" \"output_tables_as_HTML\": True,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" # send the request, upload the file\n",
|
||||
" url = f\"{base_url}/upload\"\n",
|
||||
" response = requests.post(url, headers=headers, files=files)\n",
|
||||
" response = requests.post(url, headers=headers, files=files, data=body)\n",
|
||||
"\n",
|
||||
"response.raise_for_status()\n",
|
||||
"# get the job id for the result_url\n",
|
||||
"job_id = response.json()[\"id\"]\n",
|
||||
"result_type = \"text\" # or \"markdown\"\n",
|
||||
"result_type = \"json\" # or \"markdown\" or \"json\"\n",
|
||||
"result_url = f\"{base_url}/job/{job_id}/result/{result_type}\"\n",
|
||||
"\n",
|
||||
"# check for the result until its ready\n",
|
||||
@@ -82,8 +77,7 @@
|
||||
" time.sleep(2)\n",
|
||||
"\n",
|
||||
"# download the result\n",
|
||||
"result = response.json()\n",
|
||||
"output = result[result_type]"
|
||||
"result = response.json()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -95,27 +89,94 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Provided proper attribution is provided, Google hereby grants permission to\n",
|
||||
" reproduce the tables and figures in this paper solely for use in journalistic or\n",
|
||||
" scholarly works.\n",
|
||||
" Attention Is All You Need\n",
|
||||
"arXiv:1706.03762v7 [cs.CL] 2 Aug 2023\n",
|
||||
" Ashish Vaswani∗ Noam Shazeer∗ Niki Parmar∗ Jakob Uszkoreit∗\n",
|
||||
" Google Brain Google Brain Google Research Google Research\n",
|
||||
" avaswani@google.com noam@google.com nikip@google.com usz@google.com\n",
|
||||
" Llion Jones∗ Aidan N. Gomez∗ † Łukasz Kaiser∗\n",
|
||||
" Google Research University of Toronto \n"
|
||||
"dict_keys(['pages', 'job_metadata'])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(output[:1000])"
|
||||
"print(result.keys())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"dict_keys(['page', 'text', 'md', 'images', 'charts', 'items', 'status', 'originalOrientationAngle', 'links', 'width', 'height', 'triggeredAutoMode', 'parsingMode', 'structuredData', 'noStructuredContent', 'noTextContent', 'pageHeaderMarkdown', 'pageFooterMarkdown', 'confidence'])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result[\"pages\"][0].keys())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Provided proper attribution is provided, Google hereby grants permission to reproduce the tables and figures in this paper solely for use in journalistic or scholarly works.\n",
|
||||
"\n",
|
||||
"# Attention Is All You Need\n",
|
||||
"\n",
|
||||
"**Ashish Vaswani*** \n",
|
||||
"Google Brain \n",
|
||||
"avaswani@google.com \n",
|
||||
"\n",
|
||||
"**Noam Shazeer*** \n",
|
||||
"Google Brain \n",
|
||||
"noam@google.com \n",
|
||||
"\n",
|
||||
"**Niki Parmar*** \n",
|
||||
"Google Research \n",
|
||||
"nikip@google.com \n",
|
||||
"\n",
|
||||
"**Jakob Uszkoreit*** \n",
|
||||
"Google Research \n",
|
||||
"usz@google.com \n",
|
||||
"\n",
|
||||
"**Llion Jones*** \n",
|
||||
"Google Research \n",
|
||||
"llion@google.com \n",
|
||||
"\n",
|
||||
"**Aidan N. Gomez* †** \n",
|
||||
"University of Toronto \n",
|
||||
"aidan@cs.toronto.edu \n",
|
||||
"\n",
|
||||
"**Łukasz Kaiser*** \n",
|
||||
"Google Brain \n",
|
||||
"lukaszkaiser@google.com \n",
|
||||
"\n",
|
||||
"**Illia Polosukhin* ‡** \n",
|
||||
"illia.polosukhin@gmail.com \n",
|
||||
"\n",
|
||||
"## Abstract\n",
|
||||
"\n",
|
||||
"The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles, by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.\n",
|
||||
"\n",
|
||||
"----\n",
|
||||
"\n",
|
||||
"*Equal contribution. Listing order is random. Jakob proposed replacing RNNs with self-attention and started the effort to evaluate this idea. Ashish, with Il\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result[\"pages\"][0][\"md\"][:2000])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama-parse-aNC435Vv-py3.11",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -4,7 +4,14 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LlamaParse Usage"
|
||||
"# LlamaParse Usage\n",
|
||||
"\n",
|
||||
"This notebook walks through the basic usage of LlamaParse.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-18-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -13,7 +20,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index llama-cloud-services"
|
||||
"%pip install \"llama-index>=0.13.2<0.14.0\" llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,14 +52,21 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 79ae653c-4598-4bd0-ba6e-b3dab7eab57e\n"
|
||||
"Started parsing the file under job_id ebc7e76e-addb-429b-8666-bee9c5832a84\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"result = await LlamaParse().aparse(\"./attention.pdf\")"
|
||||
"result = await LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
").aparse(\"./attention.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -64,7 +78,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1 Introduction\n",
|
||||
"1 Introduction\n",
|
||||
"\n",
|
||||
"Recurrent neural networks, long short-term memory [13] and gated recurrent [7] neural networks\n",
|
||||
"in particular, have been firmly established as state of the art approaches in sequence modeling and\n",
|
||||
"transduction problems such as language modeling and machine translation [35, 2, 5]. Numerous\n",
|
||||
@@ -86,7 +101,9 @@
|
||||
"relying entirely on an attention mechanism to draw global dependencies between input and output.\n",
|
||||
"The Transformer allows for significantly more parallelization and can reach a new state of the art in\n",
|
||||
"translation quality after being trained for as little as twelve hours on eight P100 GPUs.\n",
|
||||
"2 Background\n",
|
||||
"\n",
|
||||
"2 Background\n",
|
||||
"\n",
|
||||
"The goal of reducing sequential computation also forms the foundation of the Extended Neural GPU\n",
|
||||
"[16], ByteNet [18] and ConvS2S [9], all of which use convolutional neural networks as basic building\n",
|
||||
"block, computing hidden representations in parallel for all input and output positions. In these models,\n",
|
||||
@@ -107,13 +124,16 @@
|
||||
"entirely on self-attention to compute representations of its input and output without using sequence-\n",
|
||||
"aligned RNNs or convolution. In the following sections, we will describe the Transformer, motivate\n",
|
||||
"self-attention and discuss its advantages over models such as [17, 18] and [9].\n",
|
||||
"3 Model Architecture\n",
|
||||
"\n",
|
||||
"3 Model Architecture\n",
|
||||
"\n",
|
||||
"Most competitive neural sequence transduction models have an encoder-decoder structure [5, 2, 35].\n",
|
||||
"Here, the encoder maps an input sequence of symbol representations (x1, ..., xn) to a sequence\n",
|
||||
"of continuous representations z = (z1, ..., zn). Given z, the decoder then generates an output\n",
|
||||
"sequence (y1, ..., ym) of symbols one element at a time. At each step the model is auto-regressive\n",
|
||||
"[10], consuming the previously generated symbols as additional input when generating the next.\n",
|
||||
" 2\n"
|
||||
"\n",
|
||||
" 2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -131,39 +151,54 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"arXiv:1706.03762v7 [cs.CL] 2 Aug 2023\n",
|
||||
"\n",
|
||||
"Provided proper attribution is provided, Google hereby grants permission to reproduce the tables and figures in this paper solely for use in journalistic or scholarly works.\n",
|
||||
"\n",
|
||||
"# Attention Is All You Need\n",
|
||||
"\n",
|
||||
"Ashish Vaswani∗ Noam Shazeer∗ Niki Parmar∗ Jakob Uszkoreit∗\n",
|
||||
"**Ashish Vaswani*** \n",
|
||||
"Google Brain \n",
|
||||
"avaswani@google.com \n",
|
||||
"\n",
|
||||
"Google Brain Google Brain Google Research Google Research\n",
|
||||
"**Noam Shazeer*** \n",
|
||||
"Google Brain \n",
|
||||
"noam@google.com \n",
|
||||
"\n",
|
||||
"avaswani@google.com noam@google.com nikip@google.com usz@google.com\n",
|
||||
"**Niki Parmar*** \n",
|
||||
"Google Research \n",
|
||||
"nikip@google.com \n",
|
||||
"\n",
|
||||
"Llion Jones∗ Aidan N. Gomez∗ † Łukasz Kaiser∗\n",
|
||||
"**Jakob Uszkoreit*** \n",
|
||||
"Google Research \n",
|
||||
"usz@google.com \n",
|
||||
"\n",
|
||||
"Google Research University of Toronto Google Brain\n",
|
||||
"**Llion Jones*** \n",
|
||||
"Google Research \n",
|
||||
"llion@google.com \n",
|
||||
"\n",
|
||||
"llion@google.com aidan@cs.toronto.edu lukaszkaiser@google.com\n",
|
||||
"**Aidan N. Gomez* †** \n",
|
||||
"University of Toronto \n",
|
||||
"aidan@cs.toronto.edu \n",
|
||||
"\n",
|
||||
"Illia Polosukhin∗ ‡\n",
|
||||
"**Łukasz Kaiser*** \n",
|
||||
"Google Brain \n",
|
||||
"lukaszkaiser@google.com \n",
|
||||
"\n",
|
||||
"illia.polosukhin@gmail.com\n",
|
||||
"**Illia Polosukhin* ‡** \n",
|
||||
"illia.polosukhin@gmail.com \n",
|
||||
"\n",
|
||||
"# Abstract\n",
|
||||
"## Abstract\n",
|
||||
"\n",
|
||||
"The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles, by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.\n",
|
||||
"\n",
|
||||
"∗Equal contribution. Listing order is random. Jakob proposed replacing RNNs with self-attention and started the effort to evaluate this idea. Ashish, with Illia, designed and implemented the first Transformer models and has been crucially involved in every aspect of this work. Noam proposed scaled dot-product attention, multi-head attention and the parameter-free position representation and became the other person involved in nearly every detail. Niki designed, implemented, tuned and evaluated countless model variants in our original codebase and tensor2tensor. Llion also experimented with novel model variants, was responsible for our initial codebase, and efficient inference and visualizations. Lukasz and Aidan spent countless long days designing various parts of and implementing tensor2tensor, replacing our earlier codebase, greatly improving results and massively accelerating our research.\n",
|
||||
"----\n",
|
||||
"\n",
|
||||
"†Work performed while at Google Brain.\n",
|
||||
"*Equal contribution. Listing order is random. Jakob proposed replacing RNNs with self-attention and started the effort to evaluate this idea. Ashish, with Illia, designed and implemented the first Transformer models and has been crucially involved in every aspect of this work. Noam proposed scaled dot-product attention, multi-head attention and the parameter-free position representation and became the other person involved in nearly every detail. Niki designed, implemented, tuned and evaluated countless model variants in our original codebase and tensor2tensor. Llion also experimented with novel model variants, was responsible for our initial codebase, and efficient inference and visualizations. Lukasz and Aidan spent countless long days designing various parts of and implementing tensor2tensor, replacing our earlier codebase, greatly improving results and massively accelerating our research. \n",
|
||||
"† Work performed while at Google Brain. \n",
|
||||
"‡ Work performed while at Google Research.\n",
|
||||
"\n",
|
||||
"‡Work performed while at Google Research.\n",
|
||||
"\n",
|
||||
"31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA.\n"
|
||||
"31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -175,7 +210,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+208
-174
@@ -8,7 +8,12 @@
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/demo_insurance.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"In this notebook we will look at how LlamaParse can be used to extract structured coverage information from an insurance policy."
|
||||
"In this notebook we will look at how LlamaParse can be used to extract structured coverage information from an insurance policy.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Deprecated |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,7 +29,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index llama-parse"
|
||||
"%pip install \"llama-index>=0.13.0<0.14.0\" llama-parse"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -52,18 +57,6 @@
|
||||
"## Initializing LlamaIndex and LlamaParse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# llama-parse is async-first, running the sync code in a notebook requires the use of nest_asyncio\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -89,9 +82,10 @@
|
||||
"\n",
|
||||
"# for the purpose of this example, we will use the small model embedding and gpt3.5\n",
|
||||
"embed_model = OpenAIEmbedding(model=\"text-embedding-3-small\")\n",
|
||||
"llm = OpenAI(model=\"gpt-3.5-turbo-0125\")\n",
|
||||
"llm = OpenAI(model=\"gpt-5-mini\")\n",
|
||||
"\n",
|
||||
"Settings.llm = llm"
|
||||
"Settings.llm = llm\n",
|
||||
"Settings.embed_model = embed_model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -110,15 +104,15 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id b8946573-c911-4e00-8921-1bad1cda3d64\n",
|
||||
"......"
|
||||
"Started parsing the file under job_id 35052045-ce36-4343-9e7c-11e059a59cc2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"documents = LlamaParse(result_type=\"markdown\").load_data(\"./policy.pdf\")"
|
||||
"result = await LlamaParse().aparse(\"./policy.pdf\")\n",
|
||||
"documents = result.get_markdown_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -130,19 +124,25 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"## Preamble\n",
|
||||
"Bupa niva Health Insurance\n",
|
||||
"\n",
|
||||
"This ‘Travel Infinity’ Policy is a contract of insurance between You and Us which is subject to payment of full premium in advance and the terms, conditions and exclusions of this Policy. Expense incurred outside the policy period will NOT be covered. Unutilized Sum Insured will expire at the end of the policy year. All applicable benefits, details and limits are mentioned in your Certificate of insurance. We will cover only allopathic treatments in this policy.\n",
|
||||
"# 1. Preamble\n",
|
||||
"\n",
|
||||
"## Defined Terms\n",
|
||||
"This ‘Travel Infinity’ Policy is a contract of insurance between You and Us which is subject to payment of full premium in advance and the terms, conditions and exclusions of this Policy. Expense incurred outside the policy period will NOT be covered. Unutilized Sum Insured will expire at the end of policy year. All applicable benefits, details and limits are mentioned in your Certificate of insurance. We will cover only allopathic treatments in this policy.\n",
|
||||
"\n",
|
||||
"# 2. Defined Terms\n",
|
||||
"\n",
|
||||
"The terms listed below in this Section and used elsewhere in the Policy in Initial Capitals shall have the meaning set out against them in this Section.\n",
|
||||
"\n",
|
||||
"### Standard Definitions\n",
|
||||
"# Standard Definitions\n",
|
||||
"\n",
|
||||
"|2.1|Accident or Accidental|means sudden, unforeseen and involuntary event caused by external, visible and violent means.|\n",
|
||||
"|---|---|---|\n",
|
||||
"|2.2|Co-payment|means a cost sharing requirement under a health insurance policy that provides that the policyholder/insured will bear a specified percentage of the admissible claims a\n"
|
||||
"# 2.1\n",
|
||||
"\n",
|
||||
"Accident or Accidental means sudden, unforeseen and involuntary event caused by external, visible and violent means.\n",
|
||||
"\n",
|
||||
"# 2.2\n",
|
||||
"\n",
|
||||
"Co-payment means a cost sharing requirement under a health insurance policy that provides that the policyholder/insured will bear a specified percentage of the adm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -150,54 +150,14 @@
|
||||
"print(documents[0].text[0:1000])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Markdown Element Node Parser\n",
|
||||
"Our markdown element node parser works well for parsing the markdown output of LlamaParse into a set of table and text nodes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.node_parser import MarkdownElementNodeParser\n",
|
||||
"\n",
|
||||
"node_parser = MarkdownElementNodeParser(\n",
|
||||
" llm=OpenAI(model=\"gpt-3.5-turbo-0125\"), num_workers=8\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"nodes = node_parser.get_nodes_from_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_nodes, objects = node_parser.get_nodes_and_objects(nodes)\n",
|
||||
"\n",
|
||||
"recursive_index = VectorStoreIndex(nodes=base_nodes + objects)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_engine = recursive_index.as_query_engine(similarity_top_k=25)"
|
||||
"index = VectorStoreIndex.from_documents(documents)\n",
|
||||
"query_engine = index.as_query_engine()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -216,14 +176,29 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"You are covered for the expenses incurred on any alternate travel booking under any mode of transport, up to the limit of the Sum Insured as mentioned in the Certificate of insurance, if the delay of the airlines was caused due to specific reasons outlined in the policy. The amount you are covered for will depend on the specific terms and conditions of your policy, including the maximum coverage limit specified in the Certificate of insurance.\n"
|
||||
"I can’t give an exact dollar amount without the values shown on your Certificate of Insurance. How the claim would be settled:\n",
|
||||
"\n",
|
||||
"1. First check that your policy’s required delay threshold is met (the policy only pays if the delay exceeds the number of hours shown on your Certificate). Also the insurer won’t pay if the delay was publicly known at least 6 hours before departure.\n",
|
||||
"\n",
|
||||
"2. Find which benefit option applies on your Certificate: a fixed payment or reimbursement of actual alternate-travel cost.\n",
|
||||
" - If a fixed payment applies, you will receive the fixed sum listed on the Certificate (regardless of the $450 you paid), subject to the other conditions and any deductible shown.\n",
|
||||
" - If reimbursement applies, the insurer will reimburse up to the Sum Insured shown on the Certificate, but will first deduct any compensation paid by the airline or other sources and then apply the deductible.\n",
|
||||
"\n",
|
||||
"3. Reimbursement formula (if reimbursement option applies):\n",
|
||||
" Payable = max(0, min(Sum Insured, Amount you paid ($450) − airline/other compensation) − Deductible)\n",
|
||||
"\n",
|
||||
"4. Other limits: only one flight-delay claim is payable in the policy period as shown on the Certificate.\n",
|
||||
"\n",
|
||||
"Example: if your Certificate shows Sum Insured $1,000, Deductible $50, and the airline paid no compensation, payable = min(1000,450) − 50 = $400.\n",
|
||||
"\n",
|
||||
"Check your Certificate of Insurance for the delay threshold, whether fixed or reimbursement applies, the Sum Insured and the Deductible, and any airline compensation already received to calculate the exact amount.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_1 = \"My trip was delay and I paid 45, how much am I cover for?\"\n",
|
||||
"query_1 = \"My flight was delayed 8 hours and I paid $450, how much am I covered for?\"\n",
|
||||
"\n",
|
||||
"response_1 = query_engine.query(query_1)\n",
|
||||
"response_1 = await query_engine.aquery(query_1)\n",
|
||||
"print(str(response_1))"
|
||||
]
|
||||
},
|
||||
@@ -243,15 +218,15 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id ec9e77c9-6ad9-4c9b-9efb-c9f659b0d481\n",
|
||||
"....."
|
||||
"Started parsing the file under job_id c89abe4b-0bb3-4e04-a37f-1da880392346\n",
|
||||
"."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents_with_instruction = LlamaParse(\n",
|
||||
"result = await LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" parsing_instruction=\"\"\"\n",
|
||||
" system_prompt_append=\"\"\"\n",
|
||||
"This document is an insurance policy.\n",
|
||||
"When a benefits/coverage/exlusion is describe in the document ammend to it add a text in the follwing benefits string format (where coverage could be an exclusion).\n",
|
||||
"\n",
|
||||
@@ -260,7 +235,9 @@
|
||||
"If the document contain a benefits TABLE that describe coverage amounts, do not ouput it as a table, but instead as a list of benefits string.\n",
|
||||
" \n",
|
||||
"\"\"\",\n",
|
||||
").load_data(\"./policy.pdf\")"
|
||||
").aparse(\"./policy.pdf\")\n",
|
||||
"\n",
|
||||
"documents_with_instruction = result.get_markdown_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -279,109 +256,152 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"## Inpatient treatment\n",
|
||||
"\n",
|
||||
"Claim Form (filled and signed by pe Insured)\n",
|
||||
"Hospital Daily Cash\n",
|
||||
"Release of Medical information Form (filled and signed by pe Insured)\n",
|
||||
"Waiver of Deductible\n",
|
||||
"Original papological and diagnostic reports, discharge summary indoor case papers (if any) and prescriptions issued by pe treating Medical practitioner or Network Provider\n",
|
||||
"Optional Co-payment\n",
|
||||
"Adventure Sports Cover\n",
|
||||
"Home to Home Cover\n",
|
||||
"Passport and Visa copy wip Entry Stamp of Country of Visit and exit Stamp from India\n",
|
||||
"Extension to in-patient care\n",
|
||||
"Ambulance Charge\n",
|
||||
"FIR report of police (if applicable)\n",
|
||||
"Inpatient treatment\n",
|
||||
"\n",
|
||||
"## Out-patient treatment\n",
|
||||
"# Claim Form (filled and signed by the Insured)\n",
|
||||
"\n",
|
||||
"Cancer Screening & Mammographic Examination\n",
|
||||
"Original bills and receipts for:\n",
|
||||
"1. Charges paid towards Hospital accommodation, nursing facilities, and oper medical services rendered\n",
|
||||
"2. Fees paid to pe Medical Practitioner and for special nursing charges\n",
|
||||
"3. Charges incurred towards any and all test and / or examinations rendered in connection wip pe treatment\n",
|
||||
"4. Charges incurred towards medicines or drugs purchased from a registered pharmacy oper pan pe Network provider duly supported by pe prescriptions of pe Medical Practitioner attending to pe Insured Person\n",
|
||||
"5. Any oper document as required by pe Company to assist pe Claim\n",
|
||||
"# Hospital Daily Cash\n",
|
||||
"\n",
|
||||
"## Medical evacuation\n",
|
||||
"# Release of Medical information Form (filled and signed by the Insured)\n",
|
||||
"\n",
|
||||
"Medical reports and transportation details issued by the evacuation agency, prescriptions and medical report by the attending Medical Practitioner furnishing the name of the Insured Person and details of treatment rendered along with the statement confirming the necessity of evacuation.\n",
|
||||
"# Waiver of Deductible\n",
|
||||
"\n",
|
||||
"Documentary proof for expenses incurred towards the Medical Evacuation.\n",
|
||||
"# Original pathological and diagnostic reports, discharge summary indoor case papers (if any) and prescriptions issued by the treating Medical practitioner or Network Provider\n",
|
||||
"\n",
|
||||
"## Compassionate visit\n",
|
||||
"# Adventure Sports Cover\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner recommending the presence in the form of special assistance to be rendered by an additional member during the entire period of hospitalization. The certificate shall also specify the minimum period in which person is admitted in the hospital.\n",
|
||||
"# Home to Home Cover\n",
|
||||
"\n",
|
||||
"Discharge summary of the Hospital furnishing details including the date of admission and date of discharge.\n",
|
||||
"# Extension to in-patient care\n",
|
||||
"\n",
|
||||
"Stamped boarding pass with invoice used for the travel by the Immediate Family Member.\n",
|
||||
"# Ambulance Charge\n",
|
||||
"\n",
|
||||
"Copy passport of Immediate Family Member with entry and exit stamp.\n",
|
||||
"# Out-patient treatment\n",
|
||||
"\n",
|
||||
"## Escort of Minor Child\n",
|
||||
"# Cancer Screening & Mammographic Examination\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner specifying the minimum period of Hospitalization.\n",
|
||||
"# New Born baby Cover\n",
|
||||
"\n",
|
||||
"Discharge summary of the Hospital furnishing details including the date of admission and date of discharge.\n",
|
||||
"# Maternity\n",
|
||||
"\n",
|
||||
"Stamped Boarding pass used for the return travel of the child to the Country of Residence.\n",
|
||||
"# Complete pre-existing disease cover\n",
|
||||
"\n",
|
||||
"Stamped Boarding pass of the attendant from the Country of Residence to the place of hospitalization (if attendant is necessary).\n",
|
||||
"# Medical sum insured replenishment in case of hospitalization due to accident\n",
|
||||
"\n",
|
||||
"Copy of passport of the child with entry and exit stamp.\n",
|
||||
"# Waiver of sublimit for insured above 60 years of age\n",
|
||||
"\n",
|
||||
"## Upgradation to Business Class\n",
|
||||
"# Psychiatric Counseling\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner specifying the minimum period of Hospitalization.\n",
|
||||
"# Physiotherapy\n",
|
||||
"\n",
|
||||
"# Terrorism cover\n",
|
||||
"\n",
|
||||
"# Medical tele-consultation\n",
|
||||
"\n",
|
||||
"# Medical evacuation\n",
|
||||
"\n",
|
||||
"Medical reports and transportation details issued by the evacuation agency, prescriptions and medical report by the attending Medical Practitioner furnishing the name of the Insured Person and details of treatment rendered along with the statement confirm the necessity of evacuation. Documentary proof for expenses incurred towards the Medical Evacuation.\n",
|
||||
"\n",
|
||||
"# Compassionate visit\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner recommending the presence in the form of special assistance to be rendered by an additional member during the entire period of hospitalization. The certificate shall also specify the minimum period in which person is admitted in the hospital. Discharge summary of the Hospital furnishing details including the date of admission and date of discharge. Stamped boarding pass with invoice used for the travel by the Immediate Family Member. Copy passport of Immediate Family Member with entry and exit stamp.\n",
|
||||
"\n",
|
||||
"# Escort of Minor Child\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner specifying the minimum period of Hospitalization. Discharge summary of the Hospital furnishing details including the date of admission and date of discharge, Stamped Boarding pass used for the return travel of the child to the Country of Residence. Stamped Boarding pass of the attendant from the Country of Residence to the place of hospitalization (if attendant is necessary). Copy of passport of the child with entry and exit stamp.\n",
|
||||
"\n",
|
||||
"# Upgradation to Business Class\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner specifying the minimum period of Hospitalization. Discharge summary of the Hospital furnishing the details including the date of admission and date of discharge.\n",
|
||||
"\n",
|
||||
"Product Name: Travel infinity\n",
|
||||
"\n",
|
||||
"Product UIN: NBHTGBP22011V012223\n",
|
||||
"\n",
|
||||
"Discharge summary of the Hospital furnishing the details including the date of admission and date of discharge.\n",
|
||||
"\n",
|
||||
"Product Name: Travel infinity | Product UIN: NBHTGBP22011V012223\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"=========================================================\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Insurance Policy\n",
|
||||
"\n",
|
||||
"## Benefits:\n",
|
||||
"# Claim Form\n",
|
||||
"\n",
|
||||
"- For Inpatient treatment and in this condition when admitted to a hospital, the coverage is reimbursement for medical expenses incurred.\n",
|
||||
"- For Hospital Daily Cash and in this condition when hospitalized, the coverage is daily cash benefit.\n",
|
||||
"- For Waiver of Deductible and in this condition when a deductible is applicable, the coverage is waiver of the deductible amount.\n",
|
||||
"- For Optional Co-payment and in this condition when a co-payment is required, the coverage is optional co-payment.\n",
|
||||
"- For Adventure Sports Cover and in this condition when participating in adventure sports, the coverage is coverage for injuries related to adventure sports.\n",
|
||||
"- For Home to Home Cover and in this condition when requiring medical evacuation, the coverage is assistance for repatriation to home country.\n",
|
||||
"- For Extension to in-patient care and in this condition when extended hospital stay is necessary, the coverage is extension of coverage for in-patient care.\n",
|
||||
"- For Ambulance Charge and in this condition when ambulance services are utilized, the coverage is reimbursement for ambulance charges.\n",
|
||||
"- For Out-patient treatment and in this condition when receiving outpatient medical care, the coverage is reimbursement for outpatient medical expenses.\n",
|
||||
"- For Cancer Screening & Mammographic Examination and in this condition when undergoing cancer screening or mammographic examination, the coverage is coverage for these preventive services.\n",
|
||||
"- For New Born baby Cover and in this condition when a newborn is covered under the policy, the coverage is medical expenses coverage for the newborn.\n",
|
||||
"- For Maternity and in this condition when maternity services are required, the coverage is coverage for maternity expenses.\n",
|
||||
"- For Complete pre-existing disease cover and in this condition when seeking treatment for pre-existing conditions, the coverage is coverage for pre-existing conditions.\n",
|
||||
"- For Medical sum insured replenishment in case of hospitalization due to accident and in this condition when hospitalized due to an accident, the coverage is replenishment of the sum insured.\n",
|
||||
"- For Waiver of sublimit for insured above 60 years of age and in this condition when the insured is above 60 years of age, the coverage is waiver of sublimits.\n",
|
||||
"- For Psychiatric Counseling and in this condition when seeking psychiatric counseling, the coverage is coverage for psychiatric counseling services.\n",
|
||||
"- For Physiotherapy and in this condition when undergoing physiotherapy, the coverage is coverage for physiotherapy sessions.\n",
|
||||
"- For Terrorism cover and in this condition when affected by terrorism, the coverage is coverage for medical expenses related to terrorism incidents.\n",
|
||||
"- For Medical tele-consultation and in this condition when consulting a medical practitioner remotely, the coverage is coverage for tele-consultation services.\n",
|
||||
"- For Medical evacuation and in this condition when requiring medical evacuation, the coverage is coverage for medical evacuation services.\n",
|
||||
"- For Compassionate visit and in this condition when requiring a compassionate visit, the coverage is coverage for travel expenses for a family member to visit.\n",
|
||||
"- For Escort of Minor Child and in this condition when escorting a minor child for medical treatment, the coverage is coverage for escort services for the child.\n",
|
||||
"- For Upgradation to Business Class and in this condition when requiring upgradation to business class for medical travel, the coverage is coverage for upgradation to business class.\n"
|
||||
"Inpatient treatment\n",
|
||||
"\n",
|
||||
"- Claim Form (filled and signed by the Insured)\n",
|
||||
"- Release of Medical information Form (filled and signed by the Insured)\n",
|
||||
"- Original pathological and diagnostic reports, discharge summary indoor case papers (if any) and prescriptions issued by the treating Medical practitioner or Network Provider\n",
|
||||
"- Passport and Visa copy with Entry Stamp of Country of Visit and exit Stamp from India\n",
|
||||
"- FIR report of police (if applicable)\n",
|
||||
"\n",
|
||||
"Hospital Daily Cash\n",
|
||||
"\n",
|
||||
"Waiver of Deductible\n",
|
||||
"\n",
|
||||
"Optional Co-payment\n",
|
||||
"\n",
|
||||
"Adventure Sports Cover\n",
|
||||
"\n",
|
||||
"Home to Home Cover\n",
|
||||
"\n",
|
||||
"Extension to in-patient care\n",
|
||||
"\n",
|
||||
"Ambulance Charge\n",
|
||||
"\n",
|
||||
"Out-patient treatment\n",
|
||||
"\n",
|
||||
"Cancer Screening & Mammographic Examination\n",
|
||||
"\n",
|
||||
"New Born baby Cover\n",
|
||||
"\n",
|
||||
"Maternity\n",
|
||||
"\n",
|
||||
"Complete pre-existing disease cover\n",
|
||||
"\n",
|
||||
"Medical sum insured replenishment in case of hospitalization due to accident\n",
|
||||
"\n",
|
||||
"Waiver of sublimit for insured above 60 years of age\n",
|
||||
"\n",
|
||||
"Psychiatric Counseling\n",
|
||||
"\n",
|
||||
"Physiotherapy\n",
|
||||
"\n",
|
||||
"Terrorism cover\n",
|
||||
"\n",
|
||||
"Medical tele-consultation\n",
|
||||
"\n",
|
||||
"Medical evacuation\n",
|
||||
"\n",
|
||||
"Medical reports and transportation details issued by the evacuation agency, prescriptions and medical report by the attending Medical Practitioner furnishing the name of the Insured Person and details of treatment rendered along with the statement confirming the necessity of evacuation. Documentary proof for expenses incurred towards the Medical Evacuation.\n",
|
||||
"\n",
|
||||
"Compassionate visit\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner recommending the presence in the form of special assistance to be rendered by an additional member during the entire period of hospitalization. The certificate shall also specify the minimum period in which the person is admitted in the hospital. Discharge summary of the Hospital furnishing details including the date of admission and date of discharge. Stamped boarding pass with invoice used for the travel by the Immediate Family Member. Copy passport of Immediate Family Member with entry and exit stamp.\n",
|
||||
"\n",
|
||||
"Escort of Minor Child\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner specifying the minimum period of Hospitalization. Discharge summary of the Hospital furnishing details including the date of admission and date of discharge, Stamped Boarding pass used for the return travel of the child to the Country of Residence. Stamped Boarding pass of the attendant from the Country of Residence to the place of hospitalization (if attendant is necessary). Copy of passport of the child with entry and exit stamp.\n",
|
||||
"\n",
|
||||
"Upgradation to Business Class\n",
|
||||
"\n",
|
||||
"A certificate from the Medical Practitioner specifying the minimum period of Hospitalization. Discharge summary of the Hospital furnishing the details including the date of admission and date of discharge.\n",
|
||||
"\n",
|
||||
"Product Name: Travel infinity\n",
|
||||
"\n",
|
||||
"Product UIN: NBHTGBP22011V012223\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"target_page = 45\n",
|
||||
"pages_vanilla = documents[0].text.split(\"\\n---\\n\")\n",
|
||||
"pages_with_instructions = documents_with_instruction[0].text.split(\"\\n---\\n\")\n",
|
||||
"\n",
|
||||
"print(pages_vanilla[target_page])\n",
|
||||
"print(documents[target_page].text)\n",
|
||||
"print(\"\\n\\n=========================================================\\n\\n\")\n",
|
||||
"print(pages_with_instructions[target_page])"
|
||||
"print(documents_with_instruction[target_page].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -390,21 +410,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"node_parser_instruction = MarkdownElementNodeParser(\n",
|
||||
" llm=OpenAI(model=\"gpt-3.5-turbo-0125\"), num_workers=8\n",
|
||||
")\n",
|
||||
"nodes_instruction = node_parser.get_nodes_from_documents(documents_with_instruction)\n",
|
||||
"(\n",
|
||||
" base_nodes_instruction,\n",
|
||||
" objects_instruction,\n",
|
||||
") = node_parser_instruction.get_nodes_and_objects(nodes_instruction)\n",
|
||||
"\n",
|
||||
"recursive_index_instruction = VectorStoreIndex(\n",
|
||||
" nodes=base_nodes_instruction + objects_instruction\n",
|
||||
")\n",
|
||||
"query_engine_instruction = recursive_index_instruction.as_query_engine(\n",
|
||||
" similarity_top_k=25\n",
|
||||
")"
|
||||
"instruction_index = VectorStoreIndex.from_documents(documents_with_instruction)\n",
|
||||
"query_engine_instruction = instruction_index.as_query_engine()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -426,21 +433,46 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Vanilla:\n",
|
||||
"You are covered for the amount you paid due to the trip delay, up to the limit specified in the certificate of insurance.\n",
|
||||
"I can’t give an exact payout without details from your Certificate of Insurance. What matters is which benefit applies and the certificate values. Here’s how to determine the amount and some examples:\n",
|
||||
"\n",
|
||||
"What to check on your certificate (send these if you want a precise calculation)\n",
|
||||
"- Which benefit is being used: Flight Delay (alternate travel booking reimbursement or fixed amount) or Trip Delay (fixed amount per block of hours). \n",
|
||||
"- The minimum delay threshold (the number of hours the delay must exceed). \n",
|
||||
"- Whether the policy pays reimbursement or a fixed amount (and, if fixed, the amount per block and length of each block). \n",
|
||||
"- Sum Insured / maximum limit for that benefit. \n",
|
||||
"- Deductible (amount you must absorb per claim). \n",
|
||||
"- Any compensation already paid by the airline or other source (this is deducted from the insurer’s payment). \n",
|
||||
"- Reason for the delay and whether it’s an excluded reason (e.g., delay was publicly known 6+ hours before departure).\n",
|
||||
"\n",
|
||||
"How to calculate (general rules)\n",
|
||||
"- If the policy reimburses actual alternate travel costs: insurer pays up to the Sum Insured, but subtract any compensation from the carrier and subtract the deductible. Payment = min(Sum Insured, your expense) − carrier compensation − deductible.\n",
|
||||
"- If the policy pays a fixed amount per block of hours: determine how many blocks your 8-hour delay covers (e.g., if a block is 4 hours, 8 hours = 2 blocks). Payment = blocks × fixed amount (subject to any stated maximum and any applicable deductible/offsets).\n",
|
||||
"\n",
|
||||
"Two simple examples\n",
|
||||
"- Reimbursement example: Sum Insured ≥ $450, deductible $50, airline paid $0 → insurer would pay $450 − $50 = $400. \n",
|
||||
"- Fixed-per-block example: certificate pays $100 per 4-hour block. 8 hours = 2 blocks → insurer would pay 2 × $100 = $200 (subject to any max limit or deductible if applicable).\n",
|
||||
"\n",
|
||||
"If you share the certificate values (which benefit, sum insured, deductible, fixed-per-block amount if any, and any airline compensation), I’ll compute the exact amount.\n",
|
||||
"With instructions:\n",
|
||||
"For Trip Delay coverage, you are covered for a fixed benefit amount as mentioned in the certificate of insurance for every block of hours of delay.\n"
|
||||
"The amount payable depends on the Trip Delay benefit sum insured you chose in your policy certificate. Available Trip Delay benefit options are: 1K, 2K, 3K, 4K, 5K, 7.5K, 10K, 15K and 20K. The insurer pays the selected benefit amount for each block of delay hours as defined in your certificate (maximum up to 24 hours).\n",
|
||||
"\n",
|
||||
"So:\n",
|
||||
"- If your chosen Trip Delay benefit is at least equal to $450, the policy can cover your $450 expense (subject to the policy terms and exclusions).\n",
|
||||
"- If your chosen benefit is less than $450, the insurer will pay only up to the chosen benefit amount.\n",
|
||||
"\n",
|
||||
"Check your certificate to confirm which Trip Delay sum insured you purchased and whether any exclusions (for example, delays announced ≥6 hours before departure) apply.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_1 = \"My trip was delayed and I paid 45, how much am I covered for?\"\n",
|
||||
"query_1 = \"My flight was delayed 8 hours and I paid $450, how much am I covered for?\"\n",
|
||||
"\n",
|
||||
"response_1 = query_engine.query(query_1)\n",
|
||||
"response_1 = await query_engine.aquery(query_1)\n",
|
||||
"print(\"Vanilla:\")\n",
|
||||
"print(response_1)\n",
|
||||
"\n",
|
||||
"print(\"With instructions:\")\n",
|
||||
"response_1_i = query_engine_instruction.query(query_1)\n",
|
||||
"response_1_i = await query_engine_instruction.aquery(query_1)\n",
|
||||
"print(response_1_i)"
|
||||
]
|
||||
},
|
||||
@@ -461,21 +493,23 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Vanilla:\n",
|
||||
"Baby food is not explicitly mentioned in the provided context information regarding insurance coverages and benefits.\n",
|
||||
"No. Food and beverages (including baby food) are excluded as expenses not linked to treatment. The policy only covers medical treatment and specified newborn items (e.g., emergency inpatient care and vaccinations — vaccinations limited to USD 500) and explicitly excludes \"baby charges\" unless specifically indicated.\n",
|
||||
"With instructions:\n",
|
||||
"Baby food is excluded from coverage according to the policy terms.\n"
|
||||
"No. Baby food is not covered. The policy pays medical treatment expenses and expressly excludes items not linked to treatment (for example food and beverages), and it also lists \"baby charges\" as not payable unless specifically indicated. \n",
|
||||
"\n",
|
||||
"Newborn medical treatment and vaccinations can be covered under the newborn/maternity benefits (vaccination cover is limited and subject to the policy's special conditions, waiting periods and deductibles), so check your certificate of insurance for any specific limits or endorsements.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_2 = \"I just had a baby, is baby food covered?\"\n",
|
||||
"\n",
|
||||
"response_2 = query_engine.query(query_2)\n",
|
||||
"response_2 = await query_engine.aquery(query_2)\n",
|
||||
"print(\"Vanilla:\")\n",
|
||||
"print(response_2)\n",
|
||||
"\n",
|
||||
"print(\"With instructions:\")\n",
|
||||
"response_2_i = query_engine_instruction.query(query_2)\n",
|
||||
"response_2_i = await query_engine_instruction.aquery(query_2)\n",
|
||||
"print(response_2_i)"
|
||||
]
|
||||
},
|
||||
@@ -489,30 +523,30 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Vanilla:\n",
|
||||
"Gauze used in your operation would typically be covered under the \"Emergency In-patient Medical Treatment\" or \"Emergency In-patient Medical Treatment with OPD\" benefits of the policy.\n",
|
||||
"Gauze (including gauze soft) used in your operation is included within the procedure charges. It is subsumed into the surgical/procedure fee and is not payable as a separate item.\n",
|
||||
"With instructions:\n",
|
||||
"Gauze is not covered for use in your operation as it falls under the category of items that are excluded from coverage in the insurance policy.\n"
|
||||
"Gauze used during your operation is included in the procedure charges. Its cost is subsumed into the procedure/surgical fee and will not be reimbursed as a separate line item.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_3 = \"How is gauze used in my operation covered?\"\n",
|
||||
"\n",
|
||||
"response_3 = query_engine.query(query_3)\n",
|
||||
"response_3 = await query_engine.aquery(query_3)\n",
|
||||
"print(\"Vanilla:\")\n",
|
||||
"print(response_3)\n",
|
||||
"\n",
|
||||
"print(\"With instructions:\")\n",
|
||||
"response_3_i = query_engine_instruction.query(query_3)\n",
|
||||
"response_3_i = await query_engine_instruction.aquery(query_3)\n",
|
||||
"print(response_3_i)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -11,7 +11,12 @@
|
||||
"\n",
|
||||
"This notebook shows you how to use LlamaParse JSON mode with LlamaIndex to build a simple multimodal RAG pipeline.\n",
|
||||
"\n",
|
||||
"Using JSON mode gives you back a list of json dictionaries, which contains both text and images. You can then download these images and use a multimodal model to extract information and index them."
|
||||
"Using JSON mode gives you back a list of json dictionaries, which contains both text and images. You can then download these images and use a multimodal model to extract information and index them.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,9 +37,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index\n",
|
||||
"%pip install llama-index-core\n",
|
||||
"%pip install llama-index-llms-anthropic\n",
|
||||
"%pip install llama-index-embeddings-huggingface\n",
|
||||
"%pip install \"llama-index-core>=0.13.2<0.14.0\"\n",
|
||||
"%pip install \"llama-index-llms-anthropic>=0.8.4<0.9.0\"\n",
|
||||
"%pip install \"llama-index-embeddings-huggingface>=0.6.0<0.7.0\"\n",
|
||||
"%pip install llama-cloud-services"
|
||||
]
|
||||
},
|
||||
@@ -48,10 +53,10 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# API access to llama-cloud\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-\"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\"\n",
|
||||
"\n",
|
||||
"# Using Anthropic API for embeddings/LLMs\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = \"sk-\""
|
||||
"# Using Anthropic API for LLMs\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = \"sk-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -63,7 +68,7 @@
|
||||
"source": [
|
||||
"from llama_index.llms.anthropic import Anthropic\n",
|
||||
"\n",
|
||||
"llm = Anthropic(model=\"claude-3-5-sonnet-20241022\")"
|
||||
"llm = Anthropic(model=\"claude-4-sonnet-20250514\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -71,12 +76,21 @@
|
||||
"execution_count": null,
|
||||
"id": "700f48e8-8b52-41f3-90f9-144d5fdd5c52",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/loganmarkewich/llama_parse/py/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_index.core import Settings\n",
|
||||
"\n",
|
||||
"Settings.llm = llm\n",
|
||||
"Settings.embed_model = \"local:BAAI/bge-small-en-v1.5\""
|
||||
"Settings.embed_model = \"local:Qwen/Qwen3-Embedding-0.6B\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -119,14 +133,23 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id cf5a4f51-1af8-47f7-9b3d-80a905d06b89\n"
|
||||
"Started parsing the file under job_id 33d93a46-1b43-4619-b4ff-0c272cbca4b3\n",
|
||||
".."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(take_screenshot=True)\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = await parser.aparse(\"./uber_10q_march_2022.pdf\")"
|
||||
]
|
||||
},
|
||||
@@ -140,7 +163,7 @@
|
||||
"text_nodes = await result.aget_text_nodes(split_by_page=True)\n",
|
||||
"image_nodes = await result.aget_image_nodes(\n",
|
||||
" include_screenshot_images=True,\n",
|
||||
" include_object_images=True,\n",
|
||||
" include_object_images=False,\n",
|
||||
" image_download_dir=\"./uber_10q_images\",\n",
|
||||
")"
|
||||
]
|
||||
@@ -160,24 +183,14 @@
|
||||
"execution_count": null,
|
||||
"id": "36012145-5521-4ddb-a53e-df9ebd1ca8dd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mkdir: llama2_images: File exists\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!mkdir -p llama2_images\n",
|
||||
"\n",
|
||||
"from llama_index.core.llms import ChatMessage, ImageBlock, TextBlock\n",
|
||||
"from llama_index.core.schema import ImageNode, TextNode\n",
|
||||
"from llama_index.llms.anthropic import Anthropic\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_image_text_nodes(image_nodes: list[ImageNode]):\n",
|
||||
"async def get_image_text_nodes(image_nodes: list[ImageNode]):\n",
|
||||
" \"\"\"Extract out text from images using a multimodal model.\"\"\"\n",
|
||||
" llm = Anthropic(model=\"claude-3-5-haiku-20241022\", max_tokens=300)\n",
|
||||
" img_text_nodes = []\n",
|
||||
@@ -190,7 +203,7 @@
|
||||
" ImageBlock(path=image_path),\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" response = llm.chat([message])\n",
|
||||
" response = await llm.achat([message])\n",
|
||||
" text_node = TextNode(\n",
|
||||
" text=str(response.message.content), metadata={\"path\": image_path}\n",
|
||||
" )\n",
|
||||
@@ -206,7 +219,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image_text_nodes = get_image_text_nodes(image_nodes)"
|
||||
"image_text_nodes = await get_image_text_nodes(image_nodes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -218,7 +231,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The image shows a bar graph titled \"Monthly Active Platform Consumers (in millions)\". The graph displays data from Q2 2020 to Q1 2022 over 8 quarters. The number of monthly active platform consumers starts at 55 million in Q2 2020 and steadily increases each quarter, reaching 115 million by Q1 2022. The graph illustrates consistent quarter-over-quarter growth in this metric over the nearly 2 year time period shown.'"
|
||||
"'Alt text: United States Securities and Exchange Commission Form 10-Q for Uber Technologies, Inc., dated for the quarterly period ended March 31, 2022. The document shows company details including incorporation state (Delaware), address (1515 3rd Street, San Francisco), and indicates Uber is a large accelerated filer listed on the New York Stock Exchange with the trading symbol UBER.'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
@@ -272,9 +285,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The bar graph titled \"Monthly Active Platform Consumers (in millions)\" shows the number of monthly active consumers on Uber's platform over a period of 8 quarters from Q2 2020 to Q1 2022. \n",
|
||||
"\n",
|
||||
"The graph indicates steady quarter-over-quarter growth in this metric, starting at 55 million monthly active platform consumers in Q2 2020 and increasing each quarter to reach 115 million by Q1 2022. This represents consistent growth in Uber's user base on their platform over the nearly 2 year period shown in the graph.\n"
|
||||
"The bar graph titled 'Monthly Active Platform Consumers' shows the growth in platform users measured in millions from Q2 2020 to Q1 2022. The graph demonstrates a steady increase in the number of consumers using the platform, starting at 55 million users in Q2 2020 and rising to 115 million users in Q1 2022. The visualization displays notable growth between quarters, with the vertical axis representing the number of consumers in millions and the horizontal axis showing the quarterly progression over this two-year period.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -296,25 +307,21 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Based on the context provided, some of the main risk factors for Uber include:\n",
|
||||
"Based on the financial documents provided, I can identify some key risk factors for Uber, though the context is limited to specific pages:\n",
|
||||
"\n",
|
||||
"- A significant percentage of Uber's bookings come from large metropolitan areas, which could be negatively impacted by various economic, social, weather, regulatory and other conditions, including COVID-19.\n",
|
||||
"**Legal and Regulatory Risks:**\n",
|
||||
"- Driver classification issues pose significant business risks, as legal determinations about whether drivers are employees or independent contractors could substantially impact Uber's operations and cost structure.\n",
|
||||
"\n",
|
||||
"- Uber may fail to successfully offer autonomous vehicle technologies on its platform or these technologies may not perform as expected. \n",
|
||||
"**Operational Risks:**\n",
|
||||
"- The company continues to report net losses, indicating ongoing profitability challenges across its business segments.\n",
|
||||
"\n",
|
||||
"- Retaining and attracting high-quality personnel is important for Uber's business and continued attrition could adversely impact the company.\n",
|
||||
"**Business Model Risks:**\n",
|
||||
"- Uber operates across multiple segments (Mobility, Delivery, and Freight), which creates exposure to various market conditions and regulatory environments in different industries.\n",
|
||||
"\n",
|
||||
"- Security breaches, data privacy issues, cyberattacks and unauthorized access to Uber's proprietary data and systems pose risks.\n",
|
||||
"**Geographic Concentration Risk:**\n",
|
||||
"- The company has operations across different geographic regions, which exposes it to varying regulatory frameworks, economic conditions, and competitive landscapes in different markets.\n",
|
||||
"\n",
|
||||
"- Uber is subject to climate change risks, both physical and transitional, that could adversely impact its business if not managed properly. \n",
|
||||
"\n",
|
||||
"- Uber relies on third parties for open marketplaces to distribute its platform and software, and interference from these third parties could harm its business.\n",
|
||||
"\n",
|
||||
"- Uber will require additional capital to support its growth and this capital may not be available on reasonable terms.\n",
|
||||
"\n",
|
||||
"- Acquisitions and integrations carry risks if Uber is unable to successfully identify and integrate suitable businesses.\n",
|
||||
"\n",
|
||||
"- Extensive government regulations around payments, financial services, data privacy and other areas pose compliance risks and challenges for Uber's business model in certain jurisdictions.\n"
|
||||
"However, the provided context appears to be from specific pages of financial reports that focus primarily on financial metrics and segment information. A complete assessment of Uber's risk factors would typically be found in the dedicated risk factors section of their SEC filings, which is not included in the available context.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -327,7 +334,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -11,7 +11,12 @@
|
||||
"\n",
|
||||
"The `JobResult` object is the main object returned by the LlamaParse API. It contains all the information about the job, including the parsed data, metadata, and any errors.\n",
|
||||
"\n",
|
||||
"This notebook walks through each component of the `JobResult` object and shows you what it contains."
|
||||
"This notebook walks through each component of the `JobResult` object and shows you what it contains.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -94,7 +99,14 @@
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse()\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")\n",
|
||||
"result = await parser.aparse(\"./san_francisco_budget_2023.pdf\")"
|
||||
]
|
||||
},
|
||||
@@ -311,7 +323,16 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser = LlamaParse(take_screenshot=True)\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" # Take screenshot of the page\n",
|
||||
" take_screenshot=True,\n",
|
||||
")\n",
|
||||
"result = await parser.aparse(\"./san_francisco_budget_2023.pdf\")"
|
||||
]
|
||||
},
|
||||
@@ -481,7 +502,16 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser = LlamaParse(annotate_links=True)\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" # Annotate links in the document\n",
|
||||
" annotate_links=True,\n",
|
||||
")\n",
|
||||
"result = await parser.aparse(\"./basic-link-1.pdf\")"
|
||||
]
|
||||
},
|
||||
@@ -532,7 +562,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "llama-parse-aNC435Vv-py3.10",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
+319
-256
@@ -11,7 +11,12 @@
|
||||
"\n",
|
||||
"You can specify 80+ different languages: see this file for a full list of supported languages: https://github.com/run-llama/llama_cloud_services/blob/main/llama_parse/base.py.\n",
|
||||
"\n",
|
||||
"This notebook shows a demo of this in action. "
|
||||
"This notebook shows a demo of this in action. \n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -21,7 +26,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index llama-parse"
|
||||
"%pip install llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -66,15 +71,24 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 476966e1-9e04-49e7-a5dc-952b053b8b94\n",
|
||||
"......"
|
||||
"Started parsing the file under job_id e1efd750-ed1f-4aaa-8a46-ed07b2ad6f52\n",
|
||||
"..."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(language=\"fr\")\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" # Set the language to French!\n",
|
||||
" language=\"fr\",\n",
|
||||
")\n",
|
||||
"result = await parser.aparse(\"./treasury_report.pdf\")\n",
|
||||
"documents = result.get_text_documents(split_by_page=False)"
|
||||
]
|
||||
@@ -89,95 +103,117 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ET GESTION DE LA DETTE DE L’ÉTAT\n",
|
||||
" P.56 FOCUS OAT VERTES\n",
|
||||
" P.60 CONTRÔLE DES RISQUES & POST-MARCHÉ\n",
|
||||
" Chiffres de l’exercice 2022 P.64 À 105\n",
|
||||
" P.65 ACTIVITÉ DE L’AFT\n",
|
||||
" P.84 RAPPORT STATISTIQUE\n",
|
||||
" FICHES TECHNIQUES GLOSSAIRES LISTE DES ABRÉVIATIONS\n",
|
||||
" P.106 P.118 P.122\n",
|
||||
" AGENCE FRANCE TRÉSOR - RAPPORT D’ACTIVITÉ 2022 3\n",
|
||||
"TIVITÉ DE L’AFT\n",
|
||||
" P.84 RAPPORT STATISTIQUE\n",
|
||||
"\n",
|
||||
" FICHES TECHNIQUES GLOSSAIRES LISTE DES ABRÉVIATIONS\n",
|
||||
" P.106 P.118 P.122\n",
|
||||
"\n",
|
||||
" AGENCE FRANCE TRÉSOR - RAPPORT D’ACTIVITÉ 2022 3\n",
|
||||
"---\n",
|
||||
" Édito\n",
|
||||
" 111 Avec une croissance\n",
|
||||
" de +2,5 %, la France a illustré\n",
|
||||
" une nouvelle fois sa résilience\n",
|
||||
" économique face aux chocs.\n",
|
||||
" Édito\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" Avec une croissance\n",
|
||||
" de +2,5 %, la France a illustré\n",
|
||||
" une nouvelle fois sa résilience\n",
|
||||
" économique face aux chocs.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"4 AGENCE FRANCE TRÉSOR - RAPPORT D’ACTIVITÉ 2022\n",
|
||||
"---\n",
|
||||
" L’économie française en 2022 :\n",
|
||||
" résilience face aux chocs géopolitiques\n",
|
||||
" et économiques\n",
|
||||
" sa résilience économique face aux lors du dernier trimestre de 2022.\n",
|
||||
"LE DÉBUT DE chocs. Cette croissance a été permise Malgré un climat des affaires impacté\n",
|
||||
"L’ANNÉE 2022 grâce à une forte demande intérieure par l’inflation, le soutien apporté\n",
|
||||
" alimentée par le dynamisme de aux TPE/PME leur a permis de faire\n",
|
||||
"SEMBLAIT l’investissement et, en dépit de face aux défis énergétiques tout en\n",
|
||||
" l’inflation, d’une résilience de la préservant l’emploi.\n",
|
||||
"ENGAGÉ DANS consommation des ménages sur une\n",
|
||||
" grande partie de l’année. Afin de combattre l’inflation qui a\n",
|
||||
"UNE DYNAMIQUE largement dépassé la cible de 2 %,\n",
|
||||
" Le taux d’inflation des prix à la la BCE, de concert avec les banques\n",
|
||||
"EFFICACE DE consommation français est resté l’un centrales des principales économies\n",
|
||||
"SORTIE DE CRISE des plus bas d’Europe avec +6,0 % développées, a adapté sa fonction de\n",
|
||||
" en 2022, s’appuyant, d’une part, sur réaction en mettant fin aux politiques\n",
|
||||
"PORTÉE PAR l’atout structurel que représente un d’assouplissement monétaire qu’elle\n",
|
||||
" mix énergétique parmi les moins menait depuis la crise financière de\n",
|
||||
"UNE REPRISE exposés à la Russie et, d’autre part, 2008. Ainsi, dès juillet 2022, et pour\n",
|
||||
" sur les politiques proactives du la première fois en 10 ans, la BCE a\n",
|
||||
"ÉCONOMIQUE gouvernement avec la mise en place augmenté ses taux directeurs. Les\n",
|
||||
" du bouclier tarifaire, de la remise taux d’emprunts de l’État à 10 ans se\n",
|
||||
"INÉDITE carburant et du chèque énergie. sont ainsi progressivement éloignés\n",
|
||||
"AMORCÉE Ces dispositifs, temporaires, ont de leur territoire négatif pour\n",
|
||||
" été progressivement supprimés : la atteindre 3,10 % en fin d’année.\n",
|
||||
"EN 2021. remise carburant, d’abord prolongée\n",
|
||||
" jusqu’à mi-novembre a pris fin Cette décision s’est également\n",
|
||||
"Le déclenchement de la guerre en en décembre 2022, tandis que le accompagnée de la fin du\n",
|
||||
"Ukraine par la Russie dès février a chèque énergie exceptionnel a pris programme d’achat d’urgence (PEPP)\n",
|
||||
"rebattu les cartes de cet équilibre, fin en mars 2023. mis en place pendant la pandémie,\n",
|
||||
"provoquant des bouleversements suivi de la réduction progressive de\n",
|
||||
"majeurs sur les plans géopolitiques et Le marché du travail français a par son bilan, à un rythme mensuel de 15\n",
|
||||
"économiques, avec le déploiement ailleurs montré toute sa robustesse, milliards d’euros par mois.\n",
|
||||
"de sanctions à l’encontre de la Russie la dynamique de reprise initiée en\n",
|
||||
"et une forte poussée inflationniste. 2021 ainsi que l’effet des réformes L’Agence France Trésor a fait face à ce\n",
|
||||
"Face à cette situation, les principales structurelles engagées les années contexte de grands bouleversements\n",
|
||||
"banques centrales mondiales, dont précédentes permettant au taux géopolitiques, économiques et\n",
|
||||
"la Banque centrale européenne d’emploi des Français âgés de 15 à 64 financiers en s’appuyant sur ses\n",
|
||||
"(BCE), ont engagé une politique de ans d’atteindre fin 2022 un niveau principes de régularité, de prévisibilité\n",
|
||||
"normalisation monétaire rapide de 68,1 %, un record depuis 1975. et de transparence. Cette stratégie\n",
|
||||
"pour lutter contre l’inflation. La reprise économique de début s’est de nouveau révélée robuste et,\n",
|
||||
"Parallèlement, le gouvernement d’année et les effets positifs du plan alliée à l’engagement et à l’efficacité\n",
|
||||
"français a mis en place des mesures France Relance ont permis la création de ses équipes, ainsi qu’à la qualité\n",
|
||||
"(à hauteur de 43,6 milliards d’euros de 337 100 emplois, essentiellement de crédit de la signature de la France,\n",
|
||||
"sur l’année 2022) pour protéger les dans le secteur salarié marchand. Ce lui a permis d’accomplir sa mission\n",
|
||||
"entreprises et les ménages. dynamisme a aussi conduit à la chute de financement de l’action publique\n",
|
||||
" du taux de chômage, atteignant son au bénéfice de tous.\n",
|
||||
"Avec une croissance de +2,5 %, la niveau le plus bas depuis mars 2008\n",
|
||||
"France a illustré une nouvelle fois avec 7,2 % de demandeurs d’emploi\n",
|
||||
" Emmanuel Moulin\n",
|
||||
" DIRECTEUR GÉNÉRAL DU TRÉSOR\n",
|
||||
" ET PRÉSIDENT DE L’AFT\n",
|
||||
" AGENCE FRANCE TRÉSOR - RAPPORT D’ACTIVITÉ 2022 5\n",
|
||||
" L’économie française en 2022 :\n",
|
||||
"résilience face aux chocs géopolitiques\n",
|
||||
" et économiques\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" LE DÉBUT DE sa résilience économique face aux lors du dernier trimestre de 2022.\n",
|
||||
" L’ANNÉE 2022 chocs. Cette croissance a été permise Malgré un climat des affaires impacté\n",
|
||||
" grâce à une forte demande intérieure par l’inflation, le soutien apporté\n",
|
||||
" SEMBLAIT alimentée par le dynamisme de aux TPE/PME leur a permis de faire\n",
|
||||
" l’investissement et, en dépit de face aux défis énergétiques tout en\n",
|
||||
" ENGAGÉ DANS l’inflation, d’une résilience de la préservant l’emploi.\n",
|
||||
" consommation des ménages sur une\n",
|
||||
" UNE DYNAMIQUE grande partie de l’année. Afin de combattre l’inflation qui a\n",
|
||||
" largement dépassé la cible de 2 %,\n",
|
||||
" EFFICACE DE Le taux d’inflation des prix à la la BCE, de concert avec les banques\n",
|
||||
" SORTIE DE CRISE consommation français est resté l’un centrales des principales économies\n",
|
||||
" des plus bas d’Europe avec +6,0 % développées, a adapté sa fonction de\n",
|
||||
" PORTÉE PAR en 2022, s’appuyant, d’une part, sur réaction en mettant fin aux politiques\n",
|
||||
" l’atout structurel que représente un d’assouplissement monétaire qu’elle\n",
|
||||
" UNE REPRISE mix énergétique parmi les moins menait depuis la crise financière de\n",
|
||||
" exposés à la Russie et, d’autre part, 2008. Ainsi, dès juillet 2022, et pour\n",
|
||||
" ÉCONOMIQUE sur les politiques proactives du la première fois en 10 ans, la BCE a\n",
|
||||
" INÉDITE gouvernement avec la mise en place augmenté ses taux directeurs. Les\n",
|
||||
" du bouclier tarifaire, de la remise taux d’emprunts de l’État à 10 ans se\n",
|
||||
" AMORCÉE carburant et du chèque énergie. sont ainsi progressivement éloignés\n",
|
||||
" Ces dispositifs, temporaires, ont de leur territoire négatif pour\n",
|
||||
" EN 2021. été progressivement supprimés : la atteindre 3,10 % en fin d’année.\n",
|
||||
" remise carburant, d’abord prolongée\n",
|
||||
" jusqu’à mi-novembre a pris fin Cette décision s’est également\n",
|
||||
" Le déclenchement de la guerre en en décembre 2022, tandis que le a c c o m p a g n é e d e l a f i n d u\n",
|
||||
" Ukraine par la Russie dès février a chèque énergie exceptionnel a pris programme d’achat d’urgence (PEPP)\n",
|
||||
" rebattu les cartes de cet équilibre, fin en mars 2023. mis en place pendant la pandémie,\n",
|
||||
" provoquant des bouleversements suivi de la réduction progressive de\n",
|
||||
" majeurs sur les plans géopolitiques et Le marché du travail français a par son bilan, à un rythme mensuel de 15\n",
|
||||
" économiques, avec le déploiement ailleurs montré toute sa robustesse, milliards d’euros par mois.\n",
|
||||
" de sanctions à l’encontre de la Russie la dynamique de reprise initiée en L’Agence France Trésor a fait face à ce\n",
|
||||
" et une forte poussée inflationniste. 2021 ainsi que l’effet des réformes contexte de grands bouleversements\n",
|
||||
" Face à cette situation, les principales structurelles engagées les années géopolitiques, économiques et\n",
|
||||
" banques centrales mondiales, dont précédentes permettant au taux financiers en s’appuyant sur ses\n",
|
||||
" la Banque centrale européenne d’emploi des Français âgés de 15 à 64 principes de régularité, de prévisibilité\n",
|
||||
" (BCE), ont engagé une politique de ans d’atteindre fin 2022 un niveau et de transparence. Cette stratégie\n",
|
||||
" normalisation monétaire rapide de 68,1 %, un record depuis 1975. s’est de nouveau révélée robuste et,\n",
|
||||
" pour lutter contre l’inflation. La reprise économique de début alliée à l’engagement et à l’efficacité\n",
|
||||
" Parallèlement, le gouvernement d’année et les effets positifs du plan de ses équipes, ainsi qu’à la qualité\n",
|
||||
" français a mis en place des mesures France Relance ont permis la création de crédit de la signature de la France,\n",
|
||||
" (à hauteur de 43,6 milliards d’euros de 337 100 emplois, essentiellement lui a permis d’accomplir sa mission\n",
|
||||
" sur l’année 2022) pour protéger les dans le secteur salarié marchand. Ce de financement de l’action publique\n",
|
||||
" entreprises et les ménages. dynamisme a aussi conduit à la chute au bénéfice de tous.\n",
|
||||
" du taux de chômage, atteignant son\n",
|
||||
" Avec une croissance de +2,5 %, la niveau le plus bas depuis mars 2008\n",
|
||||
" France a illustré une nouvelle fois avec 7,2 % de demandeurs d’emploi\n",
|
||||
" Emmanuel Moulin\n",
|
||||
" DIRECTEUR GÉNÉRAL DU TRÉSOR\n",
|
||||
" ET PRÉSIDENT DE L’AFT\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" AGENCE FRANCE TRÉSOR - RAPPORT D’ACTIVITÉ 2022 5\n",
|
||||
"---\n",
|
||||
" du directeur général Le mot\n",
|
||||
" 011 En 2022, le choc d’inflation\n",
|
||||
" et la normalisation\n",
|
||||
" de la politique monétaire\n",
|
||||
" ont mis fin à une décennie\n",
|
||||
" de taux historiquement bas.\n",
|
||||
"6 AGENCE FRANCE TRÉSOR - RAPPORT D’ACTIVITÉ 2022\n",
|
||||
" Le mot\n",
|
||||
" du directeur général\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"En 2022, le choc d’inflation\n",
|
||||
" et la normalisation\n",
|
||||
" de la politique monétaire\n",
|
||||
"ont mis fin à une décennie\n",
|
||||
" de taux historiquement bas.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 6 AGENCE FRANCE TRÉSOR - RAPPORT D’ACTIVITÉ 2022\n",
|
||||
"---\n",
|
||||
" MALGRÉ UN CONTEXTE DE MARCHÉ MOUVEMENTÉ ET LES MESURES D’AMPLEUR\n",
|
||||
" PRISES POUR LIMITER L’IMPACT DE L’INFLATION SUR LES MÉNAGES ET\n",
|
||||
" LES ENTREPRISES, LE PROGRAMME DE FINANCEMENT À MOYEN ET LONG TERME\n",
|
||||
" EST DEMEURÉ INCHANGÉ À 260 MILLIARDS D’EUROS, STABLE PAR RAPPORT\n",
|
||||
" À 2021, ET LA DETTE DE COURT TERME A ÉTÉ RÉDUITE DE 7 MILLIARDS D’EUROS.\n",
|
||||
"En janvier 2022, la normalisation de d’obligations indexées sur l’inflation, la dette de court terme a été réduite\n",
|
||||
"la politique monétaire en zone euro sur lequel a été enregistré un de 7 milliards d’euros. En effet, le\n",
|
||||
"était une perspective de moyen supplément d’indexation supérieur dynamisme des recettes fiscales et\n",
|
||||
"terme. Quelques semaines plus tard, de 17 milliards d’euros à celui de la trésorerie levée lors de la crise\n",
|
||||
"l’invasion de l’Ukraine par la Russie l’année 2021. Il s’est également sanit\n"
|
||||
" MALGRÉ UN CONTEXTE DE MARCHÉ MOUVEMENTÉ ET LES MESURES D’AMPLEUR\n",
|
||||
" PRISES POUR LIMITER L’IMPACT DE L’INFLATION SUR LES MÉNAGES ET\n",
|
||||
" LES ENTREPRISES, LE PROGRAMME DE FINANCEMENT À MOYEN ET LONG TERME\n",
|
||||
" EST DEMEURÉ INCHANGÉ À 260 MILLIARDS D’EUROS, STABLE PAR RAPPORT\n",
|
||||
" À 2021, ET LA DETTE DE COURT TERME A ÉTÉ RÉDUITE DE 7 MILLIARDS D’EUROS.\n",
|
||||
"\n",
|
||||
"En janvier 2022, la normalisation de d’obligations indexées sur l’inflation, la dette de court terme a été réduite\n",
|
||||
"la politique monétaire en zone euro sur lequel a été enregistré un de 7 milliards d’euros. En effet, le\n",
|
||||
"était une perspective de moyen supplément d’indexation supérieur dynamisme des recettes fiscales et\n",
|
||||
"terme. Quelques semaines plus tard, de 17 milliards d’euros à celui de la trésorerie levée lors de la crise\n",
|
||||
"l’invasion de l’Ukraine par la Russie l’année 2021. Il s’est également sanitaire ont permis d’absorber le\n",
|
||||
"déclenchait le processus qui allait traduit par une hausse de la demande coût de ces mesures.\n",
|
||||
"mettre fin à une décennie de taux pour ces produits, qui ont représenté\n",
|
||||
"monétaires nuls ou négatifs. Dès près de 10 % du programme de La mise en œuvre des engagements\n",
|
||||
"l’été, la Banque centrale européenne financement. Ceci a notamment pris les années précédentes a\n",
|
||||
"mettait un terme à ses achats nets permis l’émission par syndication, en également mobilisé les équipes\n",
|
||||
"d’actifs et entamait la remontée de janvier, d’une nouvelle OAT indexée de l’AFT en 2022, qui ont émis\n",
|
||||
"ses taux directeurs. Illustration de la sur l’inflation européenne d’une pour le compte de la CADES\n",
|
||||
"rapidité de cette normalisation, le maturité de 30 ans, l’OAT€i 0,10 % 38 milliards d’obligations sociales\n",
|
||||
"taux de rendement des obligations 25 juillet 2053, pour un volume en 2022, permettant à la CADES\n",
|
||||
"assimilables du Trésor (OAT) à 10 ans \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -198,34 +234,7 @@
|
||||
"execution_count": null,
|
||||
"id": "ac332ea3-cfff-4216-b292-62410a26c336",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2024-02-28 16:41:26-- https://www.dropbox.com/scl/fi/g5ojyzk4m44hl7neut6vc/chinese_pdf.pdf?rlkey=45reu51kjvdvic6zucr8v9sh3&dl=1\n",
|
||||
"Resolving www.dropbox.com (www.dropbox.com)... 162.125.13.18\n",
|
||||
"Connecting to www.dropbox.com (www.dropbox.com)|162.125.13.18|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 302 Found\n",
|
||||
"Location: https://uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com/cd/0/inline/COJ69Wg2e7wH9S0ELzl4j4znoonRSQS-JJrH6mxy_vcrvY-KV7f10kMyQH6IYmtfMh_9xcDNOYnLkWkwMTYItwE1XQB5nqXbjmLJ4jLbDrMeu7-b49m796ctxevwnp7k1_U/file?dl=1# [following]\n",
|
||||
"--2024-02-28 16:41:27-- https://uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com/cd/0/inline/COJ69Wg2e7wH9S0ELzl4j4znoonRSQS-JJrH6mxy_vcrvY-KV7f10kMyQH6IYmtfMh_9xcDNOYnLkWkwMTYItwE1XQB5nqXbjmLJ4jLbDrMeu7-b49m796ctxevwnp7k1_U/file?dl=1\n",
|
||||
"Resolving uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com (uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com)... 162.125.13.15\n",
|
||||
"Connecting to uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com (uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com)|162.125.13.15|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 302 Found\n",
|
||||
"Location: /cd/0/inline2/COKEp-d6ZqzrIIaPRlanov72wwnd7GX5eNSPnsxug0A8pOpek8hO6eFxp84cY3_NMBRsAqtX-IIVPpcfYHNoV__mpu1SsOV8wV8a68DwVKaVJRJriY_KV8lEFocvLgf7c7mhrREbIJ1UBN2fx6S_qWegwVIen1z1-pw-K7icMnA3EKJNqM9DFtqx9ct0FI4vdYGsv8ckLF26WgAhs96k1cHn-VRJle4SKstdYs8EmBxiuFLXZRCL3gljwAsLu3J6WRvis9v7VJ2zNhgrcT-ZnVujlpQGoGWLLPmREKffK608Xfz1XE35DzO28e_mm4SUPRfsP2mvIUrJUtUrhobR4siqQRGojxi0S7-da4Y7fpB4Tw/file?dl=1 [following]\n",
|
||||
"--2024-02-28 16:41:27-- https://uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com/cd/0/inline2/COKEp-d6ZqzrIIaPRlanov72wwnd7GX5eNSPnsxug0A8pOpek8hO6eFxp84cY3_NMBRsAqtX-IIVPpcfYHNoV__mpu1SsOV8wV8a68DwVKaVJRJriY_KV8lEFocvLgf7c7mhrREbIJ1UBN2fx6S_qWegwVIen1z1-pw-K7icMnA3EKJNqM9DFtqx9ct0FI4vdYGsv8ckLF26WgAhs96k1cHn-VRJle4SKstdYs8EmBxiuFLXZRCL3gljwAsLu3J6WRvis9v7VJ2zNhgrcT-ZnVujlpQGoGWLLPmREKffK608Xfz1XE35DzO28e_mm4SUPRfsP2mvIUrJUtUrhobR4siqQRGojxi0S7-da4Y7fpB4Tw/file?dl=1\n",
|
||||
"Reusing existing connection to uc7a03fdb7d960dbedb23e9298ab.dl.dropboxusercontent.com:443.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 8074860 (7.7M) [application/binary]\n",
|
||||
"Saving to: ‘chinese_pdf.pdf’\n",
|
||||
"\n",
|
||||
"chinese_pdf.pdf 100%[===================>] 7.70M 37.9MB/s in 0.2s \n",
|
||||
"\n",
|
||||
"2024-02-28 16:41:28 (37.9 MB/s) - ‘chinese_pdf.pdf’ saved [8074860/8074860]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget \"https://www.dropbox.com/scl/fi/g5ojyzk4m44hl7neut6vc/chinese_pdf.pdf?rlkey=45reu51kjvdvic6zucr8v9sh3&dl=1\" -O chinese_pdf.pdf"
|
||||
]
|
||||
@@ -240,15 +249,24 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 0089f0b6-29ee-4e94-a8bf-49a137666f15\n",
|
||||
".........."
|
||||
"Started parsing the file under job_id bf9e76e8-fa2b-447a-a483-8bda12135c31\n",
|
||||
"."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(language=\"ch_sim\")\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" # Set the language to Chinese!\n",
|
||||
" language=\"ch_sim\",\n",
|
||||
")\n",
|
||||
"result = await parser.aparse(\"./chinese_pdf.pdf\")\n",
|
||||
"documents = result.get_text_documents(split_by_page=False)"
|
||||
]
|
||||
@@ -263,167 +281,212 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"中国投资有限责任公司2022年度报告 5\n",
|
||||
" 核心价值观\n",
|
||||
"\n",
|
||||
" 致力于实现国家外汇资金多元化投资,在可接受风险范围内 责任 合力\n",
|
||||
" 实现股东权益最大化,以服务于国家经济发展和深化金融体\n",
|
||||
" 忠于使命、\n",
|
||||
" 勤勉尽责 立足大局、\n",
|
||||
" 制改革的需要 有效协同\n",
|
||||
" 是公司遵奉的核心价值取向 是实现公司可持续发展的关键\n",
|
||||
"\n",
|
||||
" 愿景 专业 进取\n",
|
||||
"\n",
|
||||
" 坚持良好的专业精神和职业操守 求知进取、\n",
|
||||
" 追求卓越\n",
|
||||
" 成为受人尊重的国际一流主权财富基金 是公司成功的基石 是公司成功和发展壮大的内驱力\n",
|
||||
"---\n",
|
||||
"企业文化与核心价值观\n",
|
||||
"使命 核心价值观\n",
|
||||
" 致力于实现国家外汇资金多元化投资,在可接受风险范围内 责任 合力\n",
|
||||
" 实现股东权益最大化,以服务于国家经济发展和深化金融体\n",
|
||||
" 制改革的需要 忠于使命、勤勉尽责 立足大局、有效协同\n",
|
||||
" 是公司遵奉的核心价值取向 是实现公司可持续发展的关键\n",
|
||||
" 愿景 专业 进取\n",
|
||||
" 成为受人尊重的国际一流主权财富基金 坚持良好的专业精神和职业操守 求知进取、追求卓越\n",
|
||||
" 是公司成功的基石 是公司成功和发展壮大的内驱力\n",
|
||||
"01\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 致辞 我们将一以贯之地践行全球发展倡议,\n",
|
||||
" 充分维护投资东道国利益,\n",
|
||||
" 积极投身可持续投资,\n",
|
||||
" 助力世界经济实现更高质量、\n",
|
||||
" 更有韧性的发展。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 3 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 4\n",
|
||||
"---\n",
|
||||
"01 我们将一以贯之地践行全球发展倡议,充分维护投资东道国利益,\n",
|
||||
" 积极投身可持续投资,助力世界经济实现更高质量、更有韧性的发展。\n",
|
||||
" 致 辞\n",
|
||||
" 3 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 4\n",
|
||||
"“行之力则知愈进,知之深则行愈达。”站在新的历史起点上,中投公司\n",
|
||||
"将继续秉承精益求精、追求卓越的专业精神,与国内外合作伙伴一起深化\n",
|
||||
"合作,共聚力量、共迎挑战、共享成果,开启打造世界一流主权财富基金\n",
|
||||
"的新篇章,为助力全球经济发展作出新贡献!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"彭纯\n",
|
||||
"董事长\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"董事长致辞 2022年,是中投公司成立十五周年。\n",
|
||||
" 自2007年成立以来,中投公司坚守长期机构投资者定位,坚持国际化、市场化、专业化、负责任原则,搭\n",
|
||||
"\n",
|
||||
" 建起符合大型国际投资机构特点的治理架构,形成了系统完备的投资管理体系,经受住了国际金融危机、世纪\n",
|
||||
"\n",
|
||||
" 疫情等多个历史罕见的风险与挑战。如今,公司对外投资业务覆盖国际市场主要资产类别以及全球110多个国家\n",
|
||||
" 和地区,培养了一支高素质专业化的投资管理人才队伍,搭建了互利共赢的投资合作“朋友圈”,长期投资收\n",
|
||||
"\n",
|
||||
" 益超越董事会制定的考核目标,为促进国家外汇资产保值增值、服务国内国际双循环作出了积极贡献,在推动\n",
|
||||
"\n",
|
||||
" 全球投资合作、助力世界经济增长中贡献了中投力量,书写了中国主权财富基金不平凡的创业发展史。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"5 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 6\n",
|
||||
"---\n",
|
||||
" “行之力则知愈进,知之深则行愈达。”站在新的历史起点上,中投公司\n",
|
||||
" 将继续秉承精益求精、追求卓越的专业精神,与国内外合作伙伴一起深化\n",
|
||||
" 合作,共聚力量、共迎挑战、共享成果,开启打造世界一流主权财富基金\n",
|
||||
" 的新篇章,为助力全球经济发展作出新贡献! #Ave彭纯\n",
|
||||
" 董事长\n",
|
||||
" 2022年,是中投公司成立十五周年。\n",
|
||||
"董事长致辞 自2007年成立以来,中投公司坚守长期机构投资者定位,坚持国际化、市场化、专业化、负责任原则,搭\n",
|
||||
" 建起符合大型国际投资机构特点的治理架构,形成了系统完备的投资管理体系,经受住了国际金融危机、世纪\n",
|
||||
" 疫情等多个历史罕见的风险与挑战。如今,公司对外投资业务覆盖国际市场主要资产类别以及全球110多个国家\n",
|
||||
" 和地区,培养了一支高素质专业化的投资管理人才队伍,搭建了互利共赢的投资合作“朋友圈”,长期投资收\n",
|
||||
" 益超越董事会制定的考核目标,为促进国家外汇资产保值增值、服务国内国际双循环作出了积极贡献,在推动\n",
|
||||
" 全球投资合作、助力世界经济增长中贡献了中投力量,书写了中国主权财富基金不平凡的创业发展史。\n",
|
||||
"5 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 6\n",
|
||||
"---\n",
|
||||
" 2022年以来,全球地缘政治风险显著攀升,产业链供应链持续调整重构,美欧央行大幅加息,国际资本 我们守正创新,坚决践行双碳与可持续发展理念。更加包容、更加普惠、更有韧性的发展是全球\n",
|
||||
"市场剧烈震荡,MSCI全球股票指数、彭博全球债券指数一度自高点下跌超过22%、13%。面对风高浪急的国 可持续发展的关键。我们积极履行负责任投资者理念,制定《关于践行双碳目标和可持续投资行动的意见》,\n",
|
||||
"际环境和前所未有的巨大挑战,公司保持战略定力,发挥长期机构投资者优势,不断优化资产配置和投资策 积极开展气候变化、能源转型等主题投资。我们发布《运营碳中和行动计划》,明确时间表和路线图,全力实\n",
|
||||
"略,着力提升总组合韧性,加强重点领域风险防控,年度投资收益跑赢大市;截至2022年底,过去十年对外 现节能减排目标。我们探索以绿色资源引领乡村发展的新方法,在四个定点帮扶县持续推进巩固脱贫成果与乡\n",
|
||||
"投资年化净收益率按美元计算为6.43%,超出十年业绩目标26个基点;自成立以来累计年化国有资本增值率达 村振兴的有效衔接,助力民生保障与产业扶持,积极履行企业社会责任。\n",
|
||||
"到12.67%,圆满完成五年战略规划主要目标任务。 面向未来,我们坚信,发展与合作是破解全球性问题的“钥匙”。中投公司将一以贯之地践行全球发展倡\n",
|
||||
" 我们矢志不渝,积极打造世界一流主权财富基金。长期资本对于促进世界经济持续发展有着不 议,秉持互利共赢理念,以资本为纽带,促进国际产业交流合作,推动世界互联互通;充分维护投资东道国利\n",
|
||||
"可替代的作用。我们坚持国际化、市场化、专业化、负责任原则,快速恢复常态化对外交流交往,按照互利共 益,与东道国共创价值、共享价值;积极投身可持续投资,推动被投企业履行社会责任,助力世界经济实现更\n",
|
||||
"赢原则深化与国内外各类机构合作,持续为世界经济发展提供长期资本支持。我们积极创新对外投资方式,稳 高质量、更有韧性的发展。\n",
|
||||
"2022年以来,全球地缘政治风险显著攀升,产业链供应链持续调整重构,美欧央行大幅加息,国际资本 我们守正创新,坚决践行双碳与可持续发展理念。更加包容、更加普惠、更有韧性的发展是全球\n",
|
||||
"\n",
|
||||
"市场剧烈震荡,MSCI全球股票指数、彭博全球债券指数一度自高点下跌超过22%、13%。面对风高浪急的国 可持续发展的关键。我们积极履行负责任投资者理念,制定《关于践行双碳目标和可持续投资行动的意见》,\n",
|
||||
"际环境和前所未有的巨大挑战,公司保持战略定力,发挥长期机构投资者优势,不断优化资产配置和投资策 积极开展气候变化、能源转型等主题投资。我们发布《运营碳中和行动计划》,明确时间表和路线图,全力实\n",
|
||||
"\n",
|
||||
"略,着力提升总组合韧性,加强重点领域风险防控,年度投资收益跑赢大市;截至2022年底,过去十年对外 现节能减排目标。我们探索以绿色资源引领乡村发展的新方法,在四个定点帮扶县持续推进巩固脱贫成果与乡\n",
|
||||
"投资年化净收益率按美元计算为6.43%,超出十年业绩目标26个基点;自成立以来累计年化国有资本增值率达 村振兴的有效衔接,助力民生保障与产业扶持,积极履行企业社会责任。\n",
|
||||
"到12.67%,圆满完成五年战略规划主要目标任务。\n",
|
||||
"\n",
|
||||
" 面向未来,我们坚信,发展与合作是破解全球性问题的“钥匙”。中投公司将一以贯之地践行全球发展倡\n",
|
||||
"我们矢志不渝,积极打造世界一流主权财富基金。长期资本对于促进世界经济持续发展有着不 议,秉持互利共赢理念,以资本为纽带,促进国际产业交流合作,推动世界互联互通;充分维护投资东道国利\n",
|
||||
"\n",
|
||||
"可替代的作用。我们坚持国际化、市场化、专业化、负责任原则,快速恢复常态化对外交流交往,按照互利共 益,与东道国共创价值、共享价值;积极投身可持续投资,推动被投企业履行社会责任,助力世界经济实现更\n",
|
||||
"\n",
|
||||
"赢原则深化与国内外各类机构合作,持续为世界经济发展提供长期资本支持。我们积极创新对外投资方式,稳 高质量、更有韧性的发展。\n",
|
||||
"\n",
|
||||
"健运行多支新型双边基金,新设相关投资合作平台,深入推进中国市场价值创造,促进被投资公司拓展市场空\n",
|
||||
"间,助推国际投资与产业合作高质量发展。 经济全球化的潮流不可阻挡。我们呼吁各国携起手来,做多边主义的坚定维护者,打造更加开放有序的投\n",
|
||||
" 资环境,便利资本和资源要素在全球顺畅流动。我们尊重各方的利益关切,在开放中捕捉投资机遇,以务实合\n",
|
||||
" 我们直面挑战,着力加强自主投资能力建设。面对持续动荡的国际金融市场,我们锚定配置方 作应对共同挑战,并肩前进分享发展红利,推动世界经济平稳运行和持续增长。\n",
|
||||
"\n",
|
||||
"间,助推国际投资与产业合作高质量发展。 经济全球化的潮流不可阻挡。我们呼吁各国携起手来,做多边主义的坚定维护者,打造更加开放有序的投\n",
|
||||
"\n",
|
||||
" 资环境,便利资本和资源要素在全球顺畅流动。我们尊重各方的利益关切,在开放中捕捉投资机遇,以务实合\n",
|
||||
"我们直面挑战,着力加强自主投资能力建设。面对持续动荡的国际金融市场,我们锚定配置方 作应对共同挑战,并肩前进分享发展红利,推动世界经济平稳运行和持续增长。\n",
|
||||
"\n",
|
||||
"向,强化研究驱动,有序实施组合调整、策略优化,及时调整公开市场投资布局,质量并重推进非公开市场投\n",
|
||||
"资,完成另类资产投资占比50%的资产配置目标,对外投资总组合的韧性和质量不断提高。我们持续深化投资 “行之力则知愈进,知之深则行愈达。”过去的十五年,是中投人不惧挑战、接续奋斗的十五\n",
|
||||
"管理体制机制改革,统一非公开市场投资决策制度流程,配强投资决策专职委员并设立支持团队,投资管理科 年。 2023年是中投人落实新一轮战略规划的开局之年。上半年,在风高浪急的国际环境下,中投公司锚定战略目\n",
|
||||
"学化、专业化水平得到进一步提升。 标,统筹好发展和安全,取得了良好业绩,实现了良好开局。近期,公司部分董事更换,我们对离任董事在指导和支\n",
|
||||
" 持公司完善公司治理、深化投资管理体制机制改革、应对国际市场风险挑战等方面所作的贡献表示衷心感谢,对新\n",
|
||||
" 我们勇担使命,坚定走好中国特色金融发展之路。面对新征程新要求,我们坚持发挥“积极股 任董事表示热烈欢迎。站在新的历史起点上,中投公司将完整、准确、全面贯彻新发展理念,积极助力构建新发展格\n",
|
||||
"东”作用,督促控参股金融企业优化产品服务、加大资源倾斜力度,全力支持稳经济稳增长。我们积极创新完 局,牢牢把握高质量发展首要任务,继续秉承精益求精、追求卓越的专业精神,与国内外合作伙伴一起深化合作,共\n",
|
||||
"善“汇金模式”,推动优化国有金融资本布局,以市场化方式参与问题金融机构救助,助力金融市场稳定健康 聚力量、共迎挑战、共享成果,开启打造世界一流主权财富基金的新篇章,为助力全球经济发展作出新贡献!\n",
|
||||
" 50% “ 行 之 力 则 知 愈 进 , 知 之 深 则 行 愈 达 。\n",
|
||||
"资,完成另类资产投资占比 的资产配置目标,对外投资总组合的韧性和质量不断提高。我们持续深化投资 ” 过去的十五年,\n",
|
||||
" 是中投人不惧挑战、\n",
|
||||
" 接续奋斗的十五\n",
|
||||
"管理体制机制改革,统一非公开市场投资决策制度流程,配强投资决策专职委员并设立支持团队,投资管理科 2023年是中投人落实新一轮战略规划的开局之年。\n",
|
||||
" 上半年,\n",
|
||||
" 在风高浪急的国际环境下,\n",
|
||||
" 年。 中投公司锚定战略目\n",
|
||||
"学化、专业化水平得到进一步提升。 标,\n",
|
||||
" 统筹好发展和安全,\n",
|
||||
" 取得了良好业绩,\n",
|
||||
" 实现了良好开局。\n",
|
||||
" 近期,\n",
|
||||
" 公司部分董事更换,\n",
|
||||
" 我们对离任董事在指导和支\n",
|
||||
"\n",
|
||||
" 持公司完善公司治理、\n",
|
||||
" 深化投资管理体制机制改革、\n",
|
||||
" 应对国际市场风险挑战等方面所作的贡献表示衷心感谢,\n",
|
||||
" 对新\n",
|
||||
"我们勇担使命,坚定走好中国特色金融发展之路。面对新征程新要求,我们坚持发挥“积极股 任董事表示热烈欢迎。\n",
|
||||
" 站在新的历史起点上,\n",
|
||||
" 中投公司将完整、\n",
|
||||
" 准确、\n",
|
||||
" 全面贯彻新发展理念,\n",
|
||||
" 积极助力构建新发展格\n",
|
||||
"东”作用,督促控参股金融企业优化产品服务、加大资源倾斜力度,全力支持稳经济稳增长。我们积极创新完 局,\n",
|
||||
" 牢牢把握高质量发展首要任务,\n",
|
||||
" 继续秉承精益求精、\n",
|
||||
" 追求卓越的专业精神,\n",
|
||||
" 与国内外合作伙伴一起深化合作,\n",
|
||||
" 共\n",
|
||||
"善“汇金模式”,推动优化国有金融资本布局,以市场化方式参与问题金融机构救助,助力金融市场稳定健康 聚力量、\n",
|
||||
" 共迎挑战、\n",
|
||||
" 共享成果,\n",
|
||||
" 开启打造世界一流主权财富基金的新篇章,\n",
|
||||
" 为助力全球经济发展作出新贡献!\n",
|
||||
"发展。我们主动适应新形势新要求,围绕国有金融资本管理体系建设等重大课题深入研究,压实派出董事自主\n",
|
||||
"\n",
|
||||
"履职责任,不断提升机构化履职能力。\n",
|
||||
" 我们坚守底线,持续夯实全面风险管理体系。面对风高浪急的国际环境,我们优化风险管理委员\n",
|
||||
"\n",
|
||||
"我们坚守底线,持续夯实全面风险管理体系。面对风高浪急的国际环境,我们优化风险管理委员\n",
|
||||
"\n",
|
||||
"会设置,修订全面风险管理基本制度,增加风险类别的覆盖度,全面提升风险预见、应对、处置水平。在对外投\n",
|
||||
"\n",
|
||||
"资方面,我们严守法律合规底线,健全地缘政治、气候变化等非传统风险防控机制,突出抓好流动性管理,对外\n",
|
||||
"\n",
|
||||
"投资总组合风险保持在董事会规定的容忍度内。在国有金融资本受托管理方面,我们建立健全控参股金融企业风\n",
|
||||
"\n",
|
||||
"险监测体系,全面开展多维度风险画像,推动控参股金融企业风险减存量、控增量、防变量取得积极成效。\n",
|
||||
"7 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 8\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"7 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 8\n",
|
||||
"---\n",
|
||||
"02 中投公司的组建宗旨是实现国家外汇资金多元化投资,在可接受风\n",
|
||||
" 险范围内实现股东权益最大化,以服务于国家宏观经济发展和深化\n",
|
||||
" 公 司 介 绍 金融体制改革的需要。\n",
|
||||
" 9 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 10\n",
|
||||
"02\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 公司介绍 中投公司的组建宗旨是实现国家外汇资金多元化投资,在可接受风\n",
|
||||
" 险范围内实现股东权益最大化,以服务于国家宏观经济发展和深化\n",
|
||||
" 金融体制改革的需要。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 9 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 10\n",
|
||||
"---\n",
|
||||
"公司概况中国投资有限责任公司(以下简称“中投公司”)依照《中华人民共和国公司法》(以下简称“《公司 公司治理 中投公司按照《公司法》及《中国投资有限责任公司章程》(以下简称“《中投公司章程》”)中的有关规\n",
|
||||
"法》”)于2007年9月成立,总部设在北京。中投公司的初始资本金为2000亿美元,由中国财政部发行1.55万 定,设立了董事会、监事会和执行委员会(以下简称“执委会”),三者之间权责明确、独立履职、有效制衡。\n",
|
||||
"亿元人民币特别国债募集。截至2022年底,公司总资产达1.24万亿美元。 2022年,中投公司健全完善董事会、监事会运行机制,强化下设专门委员会的职能发挥,持续提升公司治\n",
|
||||
" 中投公司的组建宗旨是实现国家外汇资金多元化投资,在可接受风险范围内实现股东权益最大化,以服务于 理效能。公司根据业务发展需要,优化调整投资管理架构,完善投资决策和投后管理制度机制,深化全面风险管\n",
|
||||
"国家宏观经济发展和深化金融体制改革的需要。 理体系建设,全面提升机构化投资能力。\n",
|
||||
" 中投公司开展境外投资业务与境内金融机构股权管理工作。其中,境外投资业务由下设子公司⸺中投国际\n",
|
||||
"有限责任公司(以下简称“中投国际”)和中投海外直接投资有限责任公司(以下简称“中投海外”)承担,业\n",
|
||||
"务范围包括公开市场股票和债券投资,对冲基金和多资产,泛行业私募股权和私募信用投资,房地产、基础设\n",
|
||||
"施、资源商品、农业等领域的基金投资与直接投资,以及多双边基金管理等。 组织架构图\n",
|
||||
" 中央汇金投资有限责任公司(以下简称“中央汇金”)作为中投公司的子公司,根据国务院授权,对国有重\n",
|
||||
"点金融企业进行股权投资,以出资额为限代表国家依法对国有重点金融企业行使出资人权利和履行出资人义务。 董事会 监事会\n",
|
||||
"中央汇金不开展商业性经营活动,不干预其控股的国有重点金融企业的日常经营活动。 提名与\n",
|
||||
" 薪酬委员会\n",
|
||||
" 中投国际和中投海外开展的境外业务与中央汇金开展的境内业务之间实行严格的“防火墙”政策和措施。\n",
|
||||
" 战略与\n",
|
||||
" 社会责任\n",
|
||||
" 委员会\n",
|
||||
" 风险管理 执行 国际咨询 监督 审计\n",
|
||||
" 委员会 委员会 委员会 委员会 委员会\n",
|
||||
" 境外投资 管理与支持 境内股权\n",
|
||||
" 业务部门 部门 管理部门\n",
|
||||
"11 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 12\n",
|
||||
"公司概况 公司治理\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 中国投资有限责任公司(以下简称“中投公司”)依照《中华人民共和国公司法》(以下简称“《公司 中投公司按照《公司法》及《中国投资有限责任公司章程》(以下简称“《中投公司章程》”)中的有关规\n",
|
||||
" 法》”)于2007年9月成立,总部设在北京。中投公司的初始资本金为2000亿美元,由中国财政部发行1.55万 定,设立了董事会、监事会和执行委员会(以下简称“执委会”),三者之间权责明确、独立履职、有效制衡。\n",
|
||||
" 亿元人民币特别国债募集。截至2022年底,公司总资产达1.24万亿美元。\n",
|
||||
" 2022年,中投公司健全完善董事会、监事会运行机制,强化下设专门委员会的职能发挥,持续提升公司治\n",
|
||||
" 中投公司的组建宗旨是实现国家外汇资金多元化投资,在可接受风险范围内实现股东权益最大化,以服务于 理效能。公司根据业务发展需要,优化调整投资管理架构,完善投资决策和投后管理制度机制,深化全面风险管\n",
|
||||
" 国家宏观经济发展和深化金融体制改革的需要。 理体系建设,全面提升机构化投资能力。\n",
|
||||
"\n",
|
||||
" 中投公司开展境外投资业务与境内金融机构股权管理工作。其中,境外投资业务由下设子公司⸺中投国际\n",
|
||||
" 有限责任公司(以下简称“中投国际”)和中投海外直接投资有限责任公司(以下简称“中投海外”)承担,业\n",
|
||||
" 务范围包括公开市场股票和债券投资,对冲基金和多资产,泛行业私募股权和私募信用投资,房地产、基础设 组织架构图\n",
|
||||
" 施、资源商品、农业等领域的基金投资与直接投资,以及多双边基金管理等。\n",
|
||||
"\n",
|
||||
" 中央汇金投资有限责任公司(以下简称“中央汇金”)作为中投公司的子公司,根据国务院授权,对国有重\n",
|
||||
" 点金融企业进行股权投资,以出资额为限代表国家依法对国有重点金融企业行使出资人权利和履行出资人义务。 董事会 监事会\n",
|
||||
" 中央汇金不开展商业性经营活动,不干预其控股的国有重点金融企业的日常经营活动。 提名与\n",
|
||||
" 薪酬委员会\n",
|
||||
"\n",
|
||||
" 中投国际和中投海外开展的境外业务与中央汇金开展的境内业务之间实行严格的“防火墙”政策和措施。\n",
|
||||
"\n",
|
||||
" 战略与\n",
|
||||
" 社会责任\n",
|
||||
" 委员会\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"风险管理 执行 国际咨询 监督 审计\n",
|
||||
"委员会 委员会 委员会 委员会 委员会\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 境外投资 管理与支持 境内股权\n",
|
||||
" 业务部门 部门 管理部门\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" 11 中国投资有限责任公司2022年度报告 中国投资有限责任公司2022年度报告 12\n",
|
||||
"---\n",
|
||||
"董事会 沈如军\n",
|
||||
" 党委委员、执行董事、副总经理\n",
|
||||
" 中投公司董事会行使《公司法》和《中投公司章程》中规定的有限责任公司董事会的职权,主要包括:审核 1964年出生,管理学博士,高级会计师。历任中国工商银行计划财务部副总经理、\n",
|
||||
"和批准公司的发展战略、经营方针和投资计划;确定公司需向股东报告的重大事项;制定公司年度预决算方案; 北京市分行副行长、财务会计部总经理、山东省分行行长,交通银行执行董事、副\n",
|
||||
"任免公司高级管理人员;决定或授权批准设立内部管理机构等。 行长。现任本公司党委委员、执行董事、副总经理。\n",
|
||||
" 董事会由执行董事、非执行董事、独立董事以及职工董事构成。 丛亮\n",
|
||||
" 2022年,面对复杂严峻的国际经济形势,董事会加强对公司重大经营管理事项的指导和督促,及时听取投 非执行董事\n",
|
||||
"资形势、经营管理、风险防控等汇报,认真审议经营计划、财务预算和决算、业绩考核等重要议题,深入谋划中 1971年出生,经济学博士。历任国家发展和改革委员会国民经济综合司副司长、司\n",
|
||||
"投公司新一轮战略规划,明确发展目标、基本原则和重点举措,为公司下一阶段改革发展描绘新的蓝图。董事会 长,国家发展和改革委员会秘书长、新闻发言人,国家发展和改革委员会副主任,\n",
|
||||
"专门委员会根据授权,重点关注关系企业长远发展的重大事项,为董事会出谋划策,推动公司高质量发展迈上新 国家粮食和物资储备局局长。现任国家发展和改革委员会副主任,并兼任本公司非\n",
|
||||
"台阶。 执行董事。\n",
|
||||
" 许宏才\n",
|
||||
" 非执行董事\n",
|
||||
"董事会成员 1963年出生,经济学学士。历任财政部预算司副司长、司长,财政部部长助理,财\n",
|
||||
" 政部副部长。现任全国人大财政经济委员会副主任委员、全国人大常委会预算工作\n",
|
||||
" 彭 纯 \n"
|
||||
"董事会 沈如军\n",
|
||||
" 党委委员、\n",
|
||||
" 执行董事、\n",
|
||||
" 副总经理\n",
|
||||
"\n",
|
||||
" 中投公司董事会行使《公司法》和《中投公司章程》中规定的有限责任公司董事会的职权,主要包括:审核 1964年出生,管理学博士,高级会计师。历任中国工商银行计划财务部副总经理、\n",
|
||||
"和批准公司的发展战略、经营方针和投资计划;确定公司需向股东报告的重大事项;制定公司年度预决算方案; 北京市分行副行长、财务会计部总经理、山东省分行行长,交通银行执行董事、副\n",
|
||||
"任免公司高级管理人员;决定或授权批准设立内部管理机构等。 行长。现任本公司党委委员、执行董事、副总经理。\n",
|
||||
"\n",
|
||||
" 董事会由执行董事、非执行董事、独立董事以及职工董事构成。 丛亮\n",
|
||||
"\n",
|
||||
" 2022年,面对复杂严峻的国际经济形势,董事会加强对公司重大经营管理事项的指导和督促,及时听取投 非执行董事\n",
|
||||
"资形势、经营管理、风险防控等汇报,认真审议经营计划、财务预算和决算、业绩考核等重要议题,深入谋划中 1971年出生,经济学博士。历任国家发展和改革委员会国民经济综合司副司长、司\n",
|
||||
"投公司新一轮战略规划,明确发展目标、基本原则和重点举措,为公司下一阶段改革发展描绘新的蓝图。董事会 长,国家发展和改革委员会秘书长、新闻发言人,国家发展和改革委员会副主任,\n",
|
||||
"专门委员会根据授权,重点关注关系企业长远发展的重大事项,为董事会出谋划策,推动公司高质量发展迈上新 国家粮食和物资储备局局长。现任国家发展和改革委员会副主任,并兼任本公司非\n",
|
||||
"台\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(documents[0].get_content()[1000:10000])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "640f0679-7f7e-4b0a-a46d-b099ae382fe2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# download another copy with a different name to avoid hitting pdf cache\n",
|
||||
"!wget \"https://www.dropbox.com/scl/fi/g5ojyzk4m44hl7neut6vc/chinese_pdf.pdf?rlkey=45reu51kjvdvic6zucr8v9sh3&dl=1\" -O chinese_pdf2.pdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bfcacf90-ca67-4bfd-b023-be0af2cb18c5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 99538f59-24f7-4f1e-ab27-4081933fa5ee\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"base_parser = LlamaParse(language=\"en\")\n",
|
||||
"result = await base_parser.aparse(\"./chinese_pdf2.pdf\")\n",
|
||||
"base_documents = result.get_text_documents(split_by_page=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b264ed4e-647a-4f51-9f79-fdf82b76762a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(base_documents[0].get_content()[1000:10000])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -13,7 +13,12 @@
|
||||
"\n",
|
||||
"We illustrate the process of using llama-parse to parse a PDF document, then index the document with a MongoDB vector store, and subsequently perform basic queries against this store.\n",
|
||||
"\n",
|
||||
"This notebook is structured similarly to quick start guides, aiming to introduce users to utilizing llama-parse in conjunction with a MongoDB Atlas VectorSearch."
|
||||
"This notebook is structured similarly to quick start guides, aiming to introduce users to utilizing llama-parse in conjunction with a MongoDB Atlas VectorSearch.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -29,8 +34,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index llama-parse\n",
|
||||
"%pip install llama-index-vector-stores-mongodb llama-index-llms-openai"
|
||||
"%pip install llama-cloud-services\n",
|
||||
"%pip install \"llama-index-vector-stores-mongodb>=0.8.0<0.9.0\" \"llama-index>=0.13.0<0.14.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,8 +55,10 @@
|
||||
"\n",
|
||||
"os.environ[\n",
|
||||
" \"LLAMA_CLOUD_API_KEY\"\n",
|
||||
"] = \"\" # Get it from https://cloud.llamaindex.ai/api-key\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\" # Get it from https://platform.openai.com/api-keys"
|
||||
"] = \"llx-...\" # Get it from https://cloud.llamaindex.ai/api-key\n",
|
||||
"os.environ[\n",
|
||||
" \"OPENAI_API_KEY\"\n",
|
||||
"] = \"sk-...\" # Get it from https://platform.openai.com/api-keys"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,6 +77,20 @@
|
||||
"from llama_index.core.node_parser import SentenceSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core import Settings\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"\n",
|
||||
"Settings.llm = OpenAI(model=\"gpt-5-mini\")\n",
|
||||
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-3-small\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -127,12 +148,21 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 09a49745-9f21-4190-9de8-27e4e1a4bdf5\n"
|
||||
"Started parsing the file under job_id 993fa45f-f4ed-4d49-9032-794b3470305a\n",
|
||||
"."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = await LlamaParse().aparse(file_path)\n",
|
||||
"result = await LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
").aparse(file_path)\n",
|
||||
"\n",
|
||||
"documents = result.get_text_documents(split_by_page=False)"
|
||||
]
|
||||
},
|
||||
@@ -145,19 +175,25 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"rmer - model architecture.\n",
|
||||
"The Transformer follows this overall architecture using stacked self-attention and point-wise, fully\n",
|
||||
"connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1,\n",
|
||||
"respectively.\n",
|
||||
"3.1 Encoder and Decoder Stacks\n",
|
||||
"Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two\n",
|
||||
"sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position-\n",
|
||||
"wise fully connected feed-forward network. We employ a residual connection [11] around each of\n",
|
||||
"the two sub-layers, followed by layer normalization [1]. That is, the output of each sub-layer is\n",
|
||||
"LayerNorm(x + Sublayer(x)), where Sublayer(x) is the function implemented by the sub-layer\n",
|
||||
"itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding\n",
|
||||
"layers, produce outputs of dimension dmodel = 512.\n",
|
||||
"Decoder: The decoder is also composed of a stack of N = 6 identical layers. In addition \n"
|
||||
" sub-layer, which performs multi-head\n",
|
||||
"attention over the output of the encoder stack. Similar to the encoder, we employ residual connections\n",
|
||||
"around each of the sub-layers, followed by layer normalization. We also modify the self-attention\n",
|
||||
"sub-layer in the decoder stack to prevent positions from attending to subsequent positions. This\n",
|
||||
"masking, combined with fact that the output embeddings are offset by one position, ensures that the\n",
|
||||
"predictions for position i can depend only on the known outputs at positions less than i.\n",
|
||||
"\n",
|
||||
"3.2 Attention\n",
|
||||
"An attention function can be described as mapping a query and a set of key-value pairs to an output,\n",
|
||||
"where the query, keys, values, and output are all vectors. The output is computed as a weighted sum\n",
|
||||
"\n",
|
||||
" 3\n",
|
||||
"---\n",
|
||||
" Scaled Dot-Product Attention Multi-Head Attention\n",
|
||||
"\n",
|
||||
" Linear\n",
|
||||
" MatMul\n",
|
||||
"\n",
|
||||
" SoftMax \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -180,10 +216,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mongo_uri = os.environ[\"MONGO_URI\"]\n",
|
||||
"mongo_uri = \"<mongodb_uri>\"\n",
|
||||
"\n",
|
||||
"mongodb_client = pymongo.MongoClient(mongo_uri)\n",
|
||||
"mongodb_vector_store = MongoDBAtlasVectorSearch(mongodb_client)"
|
||||
"mongodb_vector_store = MongoDBAtlasVectorSearch(mongodb_client)\n",
|
||||
"\n",
|
||||
"mongodb_vector_store.create_vector_search_index(\n",
|
||||
" dimensions=1536, path=\"embedding\", similarity=\"cosine\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -222,7 +262,6 @@
|
||||
"index = VectorStoreIndex(\n",
|
||||
" nodes=nodes,\n",
|
||||
" storage_context=storage_context,\n",
|
||||
" embed_model=OpenAIEmbedding(),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -253,7 +292,7 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"***********New LlamaParse+ Basic Query Engine***********\n",
|
||||
"The BLEU score on the WMT 2014 English-to-German translation task is 28.4.\n"
|
||||
"28.4 BLEU\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -274,39 +313,56 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"We varied the learning\n",
|
||||
"For our big models,(described on the\n",
|
||||
"bottom line of table 3), step time was 1.0 seconds. The big models were trained for 300,000 steps\n",
|
||||
"(3.5 days).\n",
|
||||
"\n",
|
||||
"5.3 Optimizer\n",
|
||||
"\n",
|
||||
"We used the Adam optimizer [20] with β1 = 0.9, β2 = 0.98 and ϵ = 10−9. We varied the learning\n",
|
||||
"rate over the course of training, according to the formula:\n",
|
||||
" lrate = d−0.5 (3)\n",
|
||||
" model · min(step_num−0.5, step_num · warmup_steps−1.5)\n",
|
||||
"\n",
|
||||
" lrate = d−0.5 · min(step_num−0.5, step_num · warmup_steps−1.5) (3)\n",
|
||||
" model\n",
|
||||
"\n",
|
||||
"This corresponds to increasing the learning rate linearly for the first warmup_steps training steps,\n",
|
||||
"and decreasing it thereafter proportionally to the inverse square root of the step number. We used\n",
|
||||
"warmup_steps = 4000.\n",
|
||||
"5.4 Regularization\n",
|
||||
"\n",
|
||||
"5.4 Regularization\n",
|
||||
"\n",
|
||||
"We employ three types of regularization during training:\n",
|
||||
" 7\n",
|
||||
"\n",
|
||||
" 7\n",
|
||||
"---\n",
|
||||
"Table 2: The Transformer achieves better BLEU scores than previous state-of-the-art models on the\n",
|
||||
"English-to-German and English-to-French newstest2014 tests at a fraction of the training cost.\n",
|
||||
" Model BLEU Training Cost (FLOPs)\n",
|
||||
" EN-DE EN-FR EN-DE EN-FR\n",
|
||||
" ByteNet [18] 23.75\n",
|
||||
" Deep-Att + PosUnk [39] 39.2 1.0 · 1020\n",
|
||||
" GNMT + RL [38] 24.6 39.92 2.3 · 1019 1.4 · 1020\n",
|
||||
" ConvS2S [9] 25.16 40.46 9.6 · 1018 1.5 · 1020\n",
|
||||
" MoE [32] 26.03 40.56 2.0 · 1019 1.2 · 1020\n",
|
||||
" Deep-Att + PosUnk Ensemble [39] 40.4 8.0 · 1020\n",
|
||||
" GNMT + RL Ensemble [38] 26.30 41.16 1.8 · 1020 1.1 · 1021\n",
|
||||
" ConvS2S Ensemble [9] 26.36 41.29 7.7 · 1019 1.2 · 1021\n",
|
||||
" Transformer (base model) 27.3 38.1 3.3 · 1018\n",
|
||||
" Transformer (big) 28.4 41.8 2.3 · 1019\n",
|
||||
"Residual Dropout We apply dropout [33] to the output of each sub-layer, before it is added to the\n",
|
||||
"\n",
|
||||
" Model BLEU Training Cost (FLOPs)\n",
|
||||
" EN-DE EN-FR EN-DE EN-FR\n",
|
||||
" ByteNet [18] 23.75\n",
|
||||
" Deep-Att + PosUnk [39] 39.2 1.0 · 1020\n",
|
||||
" GNMT + RL [38] 24.6 39.92 2.3 · 1019 1.4 · 1020\n",
|
||||
" ConvS2S [9] 25.16 40.46 9.6 · 1018 1.5 · 1020\n",
|
||||
" MoE [32] 26.03 40.56 2.0 · 1019 1.2 · 1020\n",
|
||||
" Deep-Att + PosUnk Ensemble [39] 40.4 8.0 · 1020\n",
|
||||
" GNMT + RL Ensemble [38] 26.30 41.16 1.8 · 1020 1.1 · 1021\n",
|
||||
" ConvS2S Ensemble [9] 26.36 41.29 7.7 · 1019 1.2 · 1021\n",
|
||||
" Transformer (base model) 27.3 38.1 3.3 · 1018\n",
|
||||
" Transformer (big) 28.4 41.8 2.3 · 1019\n",
|
||||
"\n",
|
||||
"Residual Dropout We apply dropout [33] to the output of each sub-layer, before it is added to the\n",
|
||||
"sub-layer input and normalized. In addition, we apply dropout to the sums of the embeddings and the\n",
|
||||
"positional encodings in both the encoder and decoder stacks. For the base model, we use a rate of\n",
|
||||
"Pdrop = 0.1.\n",
|
||||
"Label Smoothing During training, we employed label smoothing of value ϵls = 0.1 [36]. This\n",
|
||||
"\n",
|
||||
"Label Smoothing During training, we employed label smoothing of value ϵls = 0.1 [36]. This\n",
|
||||
"hurts perplexity, as the model learns to be more unsure, but improves accuracy and BLEU score.\n",
|
||||
"\n",
|
||||
"6 Results\n",
|
||||
"6.1 Machine Translation\n",
|
||||
"\n",
|
||||
"6.1 Machine Translation\n",
|
||||
"\n",
|
||||
"On the WMT 2014 English-to-German translation task, the big transformer model (Transformer (big)\n",
|
||||
"in Table 2) outperforms the best previously reported models (including ensembles) by more than 2.0\n",
|
||||
"BLEU, establishing a new state-of-the-art BLEU score of 28.4. The configuration of this model is\n",
|
||||
@@ -319,11 +375,7 @@
|
||||
"dropout rate Pdrop = 0.1, instead of 0.3.\n",
|
||||
"For the base models, we used a single model obtained by averaging the last 5 checkpoints, which\n",
|
||||
"were written at 10-minute intervals. For the big models, we averaged the last 20 checkpoints. We\n",
|
||||
"used beam search with a beam size of 4 and length penalty α = 0.6 [38]. These hyperparameters\n",
|
||||
"were chosen after experimentation on the development set. We set the maximum output length during\n",
|
||||
"inference to input length + 50, but terminate early when possible [38].\n",
|
||||
"Table 2 summarizes our results and compares our translation quality and training costs to other model\n",
|
||||
"architectures from the literature.\n"
|
||||
"used beam search with a beam size of 4 and length penalty α = 0.6 [38].\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -338,9 +390,9 @@
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "anthropic_env",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "anthropic_env"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -352,11 +404,6 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -17,7 +17,12 @@
|
||||
"\n",
|
||||
"This cookbook shows you how to use LlamaParse to parse any document with the multimodal capabilities of Multi-Modal LLMs from Anthropic/ OpenAI.\n",
|
||||
"\n",
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n"
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -35,7 +40,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install llama-cloud-services"
|
||||
"%pip install llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -58,7 +63,7 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# API access to llama-cloud\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"<YOUR LLAMACLOUD API KEY>\""
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -101,7 +106,7 @@
|
||||
"id": "1b5d6da6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### With anthropic-sonnet-3.5"
|
||||
"### With anthropic-sonnet-4.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -114,7 +119,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id dd9d5e0f-160e-486a-89a2-6005e5a1c2ac\n"
|
||||
"Started parsing the file under job_id fdbe857e-48d0-4024-ba06-bfead78c4a0c\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -122,13 +127,19 @@
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model_name=\"anthropic-sonnet-3.5\",\n",
|
||||
" target_pages=\"24\"\n",
|
||||
" # invalidate_cache=True\n",
|
||||
" # Enable pure multimodal parsing\n",
|
||||
" parse_mode=\"parse_page_with_lvm\",\n",
|
||||
" vendor_multimodal_model_name=\"anthropic-sonnet-4.0\",\n",
|
||||
" # Pass in your own API key optionally\n",
|
||||
" # vendor_multimodal_api_key=\"fake\",\n",
|
||||
" target_pages=\"24\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")\n",
|
||||
"result = await parser.aparse(\"o1.pdf\")\n",
|
||||
"nodes = result.get_text_nodes(split_by_page=False)"
|
||||
"sonnet_nodes = result.get_markdown_nodes(split_by_page=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -136,9 +147,9 @@
|
||||
"id": "4f3c51b0-7878-48d7-9bc3-02b516500128",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### With GPT-4o\n",
|
||||
"### With GPT-4.1-mini\n",
|
||||
"\n",
|
||||
"For comparison, we will also parse the document using GPT-4o."
|
||||
"For comparison, we will also parse the document using GPT-4.1-mini."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -151,7 +162,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 6a4dea44-4f90-406b-b290-9e98620b1232\n"
|
||||
"Started parsing the file under job_id faab19bf-0810-4437-a1ff-4f6ae36d6ce0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -159,13 +170,19 @@
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser_gpt4o = LlamaParse(\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model=\"openai-gpt4o\",\n",
|
||||
" # Enable pure multimodal parsing\n",
|
||||
" parse_mode=\"parse_page_with_lvm\",\n",
|
||||
" vendor_multimodal_model_name=\"openai-gpt-4-1-mini\",\n",
|
||||
" # Pass in your own API key optionally\n",
|
||||
" # vendor_multimodal_api_key=\"fake\",\n",
|
||||
" target_pages=\"24\",\n",
|
||||
" # invalidate_cache=True\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")\n",
|
||||
"result = await parser_gpt4o.aparse(\"o1.pdf\")\n",
|
||||
"nodes = result.get_markdown_nodes(split_by_page=False)"
|
||||
"gpt_nodes = result.get_markdown_nodes(split_by_page=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -188,28 +205,93 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 25\n",
|
||||
"file_name: o1.pdf\n",
|
||||
"\n",
|
||||
"| Participant_ID | clinical Description Reference |\n",
|
||||
"|-----------------|----------------------------------|\n",
|
||||
"| Attribute | Value | Basic Personal Information: Subject 098_S_0896 is a 72.0-year-old Female who has completed 15 years of education. The ethnicity is Not Hisp/Latino and race is White. Marital status is Married. Initially diagnosed as AD, as of the date 2007-10-24, the final diagnosis was Dementia. |\n",
|
||||
"| Age | 72.0 |\n",
|
||||
"| Sex | Female |\n",
|
||||
"| Education | 15 |\n",
|
||||
"| Race | White | Biomarker Measurements: The subject's genetic profile includes an ApoE4 status of 0.0... |\n",
|
||||
"| DX_bl | AD |\n",
|
||||
"| DX | Dementia |\n",
|
||||
"| ... | ... | Cognitive and Neurofunctional Assessments: The Mini-Mental State Examination score stands at 29.0. The Clinical Dementia Rating, sum of boxes, is 1.0. ADAS 11 and 13 scores are 4.67 and 4.67 respectively, with a score of 1.0 in delayed word recall... |\n",
|
||||
"| APOE4 | 1.0 |\n",
|
||||
"| TAU | 212.5 |\n",
|
||||
"| ... | ... |\n",
|
||||
"| MMSE | 29.0 | Volumetric Data: Under MRI conditions at a field strength of 1.5 Tesla MRI Tesla, using Cross Sectional FreeSurfer (FreeSurfer Version 4.3), the imaging data recorded includes ventricles volume at 54422.0, hippocampus volume at 6677.0, whole brain volume at 1147980.0, entorhinal cortex volume at 2782.0, fusiform gyrus volume at 19432.0, and middle temporal area volume at 24951.0. The intracranial volume measured is 1799580.0.... |\n",
|
||||
"| CDRSB | 0.0 |\n",
|
||||
"| ... | ... |\n",
|
||||
"| FLDSTRENG | 1.5 Tesla MRI |\n",
|
||||
"| Ventricles | 84599 |\n",
|
||||
"| Hippocampus | 5319 |\n",
|
||||
"| ... | ... |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"<table>\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th>Participant_ID</th>\n",
|
||||
"<th>clinical Description Reference</th>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td>Attribute</td>\n",
|
||||
"<td>Value</td>\n",
|
||||
"<td rowspan=\"12\"><strong>Basic Personal Information:</strong> Subject 098_S_0896 is a 72.0-year-old Female who has completed 15 years of education. The ethnicity is Not Hisp/Latino and race is White. Marital status is Married. Initially diagnosed as AD, as of the date 2007-10-24, the final diagnosis was Dementia.<br><br><strong>Biomarker Measurements:</strong> The subject's genetic profile includes an ApoE4 status of 0.0...<br><br><strong>Cognitive and Neurofunctional Assessments:</strong> The Mini-Mental State Examination score stands at 29.0. The Clinical Dementia Rating, sum of boxes, is 1.0. ADAS 11 and 13 scores are 4.67 and 4.67 respectively, with a score of 1.0 in delayed word recall...<br><br><strong>Volumetric Data:</strong> Under MRI conditions at a field strength of 1.5 Tesla MRI Tesla, using Cross-Sectional FreeSurfer (FreeSurfer Version 4.3), the imaging data recorded includes ventricles volume at 54422.0, hippocampus volume at 6717.0, whole brain volume at 1147980.0, entorhinal cortex volume at 2782.0, fusiform gyrus volume at 19432.0, and middle temporal area volume at 24951.0. The intracranial volume measured is 1799580.0....</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Age</td>\n",
|
||||
"<td>72.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Sex</td>\n",
|
||||
"<td>Female</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Education</td>\n",
|
||||
"<td>15</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Race</td>\n",
|
||||
"<td>White</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>DX_bl</td>\n",
|
||||
"<td>AD</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>DX</td>\n",
|
||||
"<td>Dementia</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>...</td>\n",
|
||||
"<td>...</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>APOE4</td>\n",
|
||||
"<td>1.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>TAU</td>\n",
|
||||
"<td>212.5</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>...</td>\n",
|
||||
"<td>...</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>MMSE</td>\n",
|
||||
"<td>29.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>CDRSB</td>\n",
|
||||
"<td>0.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>...</td>\n",
|
||||
"<td>...</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>FLDSTRENG</td>\n",
|
||||
"<td>1.5 Tesla MRI</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Ventricles</td>\n",
|
||||
"<td>84509</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Hippocampus</td>\n",
|
||||
"<td>5319</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>...</td>\n",
|
||||
"<td>...</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"Figure 2: An example of a patient table and its corresponding clinical description.\n",
|
||||
"\n",
|
||||
@@ -217,13 +299,15 @@
|
||||
"\n",
|
||||
"In this section, we selected two primary areas of mathematics: algebra and counting and probability in this section. We chose these two topics because of their heavy reliance on problem-solving skills and their frequent use in assessing logical and abstract thinking [46]. The dataset used in testing is from the MATH dataset [46]. The problems in the dataset cover a wide range of subjects, including Prealgebra, Intermediate Algebra, Algebra, Geometry, Counting and Probability, Number Theory, and Precalculus. Each problem is categorized based on difficulty, ranked from level 1 to 5, according to the Art of Problem Solving (AoPS). The dataset mainly comprises problems from various high school math competitions, including the American Mathematics Competitions (AMC) 10 and 12, as well as the American Invitational Mathematics Examination (AIME), and other similar contests. Each problem comes with detailed reference solutions, allowing for a comprehensive comparison of o1-preview's solutions.\n",
|
||||
"\n",
|
||||
"In addition to evaluating the final answers produced by o1-preview, our analysis delves into the step-by-step reasoning process of the o1-preview's solutions. By comparing o1-preview's solutions with the dataset's solutions, we assess its ability to engage in logical reasoning, handle abstract problem-solving tasks, and apply structured approaches to reach correct answers. This deeper analysis offers insights into o1-preview's overall reasoning capabilities, using mathematics as a reliable indicator for logical and structured thought processes.\n"
|
||||
"In addition to evaluating the final answers produced by o1-preview, our analysis delves into the step-by-step reasoning process of the o1-preview's solutions. By comparing o1-preview's solutions with the dataset's solutions, we assess its ability to engage in logical reasoning, handle abstract problem-solving tasks, and apply structured approaches to reach correct answers. This deeper analysis offers insights into o1-preview's overall reasoning capabilities, using mathematics as a reliable indicator for logical and structured thought processes.\n",
|
||||
"\n",
|
||||
"25\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# using Sonnet-3.5\n",
|
||||
"print(nodes[0].get_content(metadata_mode=\"all\"))"
|
||||
"# using Sonnet-4.0\n",
|
||||
"print(sonnet_nodes[0].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -236,43 +320,106 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 25\n",
|
||||
"file_name: o1.pdf\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Participant_ID | clinical Description Reference |\n",
|
||||
"|----------------|--------------------------------|\n",
|
||||
"| **Attribute** | **Value** |\n",
|
||||
"| Age | 72.0 |\n",
|
||||
"| Sex | Female |\n",
|
||||
"| Education | 15 |\n",
|
||||
"| Race | White |\n",
|
||||
"| DX_bl | AD |\n",
|
||||
"| DX | Dementia |\n",
|
||||
"| ... | ... |\n",
|
||||
"| APOE4 | 1.0 |\n",
|
||||
"| TAU | 212.5 |\n",
|
||||
"| ... | ... |\n",
|
||||
"| MMSE | 29.0 |\n",
|
||||
"| CDRSB | 0.0 |\n",
|
||||
"| ... | ... |\n",
|
||||
"| FLDSTRENG | 1.5 Tesla MRI |\n",
|
||||
"| Ventricles | 84599 |\n",
|
||||
"| Hippocampus | 5319 |\n",
|
||||
"| ... | ... |\n",
|
||||
"\n",
|
||||
"**Basic Personal Information:** Subject 098_S_0896 is a 72.0-year-old Female who has completed 15 years of education. The ethnicity is Not Hisp/Latino and race is White. Marital status is Married. Initially diagnosed as AD, as of the date 2007-10-24, the final diagnosis was Dementia.\n",
|
||||
"\n",
|
||||
"**Biomarker Measurements:** The subject's genetic profile includes an ApoE4 status of 0.0...\n",
|
||||
"\n",
|
||||
"**Cognitive and Neurofunctional Assessments:** The Mini-Mental State Examination score stands at 29.0. The Clinical Dementia Rating, sum of boxes, is 1.0. ADAS 11 and 13 scores are 4.67 and 4.67 respectively, with a score of 1.0 in delayed word recall...\n",
|
||||
"\n",
|
||||
"**Volumetric Data:** Under MRI conditions at a field strength of 1.5 Tesla MRI Tesla, using Cross-Sectional FreeSurfer (FreeSurfer Version 4.3), the imaging data recorded includes ventricles volume at 84422.0, hippocampus volume at 6677.0, whole brain volume at 1147980.0, entorhinal cortex volume at 27820.0, fusiform gyrus volume at 19432.0, and middle temporal area volume at 24951.0. The intracranial volume measured is 1799580.0...\n",
|
||||
"<table>\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th colspan=\"2\"><b>Participant_ID</b></th>\n",
|
||||
"<th rowspan=\"2\" style=\"background-color: #b0b0b0;\"><b>clinical Description Reference</b></th>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td><b>Attribute</b></td>\n",
|
||||
"<td><b>Value</b></td>\n",
|
||||
"<td rowspan=\"17\" style=\"background-color: #d0d0d0; vertical-align: top;\">\n",
|
||||
"<b>Basic Personal Information:</b> Subject 098_S_0896 is a 72.0-year-old Female who has completed 15 years of education. The ethnicity is Not Hisp/Latino and race is White. Marital status is Married. Initially diagnosed as AD, as of the date 2007-10-24, the final diagnosis was Dementia.<br><br>\n",
|
||||
"<b>Biomarker Measurements:</b> The subject's genetic profile includes an ApoE4 status of 0.0…<br><br>\n",
|
||||
"<b>Cognitive and Neurofunctional Assessments:</b> The Mini-Mental State Examination score stands at 29.0. The Clinical Dementia Rating, sum of boxes, is 1.0. ADAS 11 and 13 scores are 4.67 and 4.67 respectively, with a score of 1.0 in delayed word recall…<br><br>\n",
|
||||
"<b>Volumetric Data:</b> Under MRI conditions at a field strength of 1.5 Tesla MRI Tesla, using Cross-Sectional FreeSurfer (FreeSurfer Version 4.3), the imaging data recorded includes ventricles volume at 54422.0, hippocampus volume at 6677.0, whole brain volume at 1147980.0, entorhinal cortex volume at 2782.0, fusiform gyrus volume at 19432.0, and middle temporal area volume at 24951.0. The intracranial volume measured is 1799580.0.… \n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td rowspan=\"7\"><b>Basic Personal information</b></td>\n",
|
||||
"<td>Age</td>\n",
|
||||
"<td>72.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Sex</td>\n",
|
||||
"<td>Female</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Education</td>\n",
|
||||
"<td>15</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Race</td>\n",
|
||||
"<td>White</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>DX_bl</td>\n",
|
||||
"<td>AD</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>DX</td>\n",
|
||||
"<td>Dementia</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>…</td>\n",
|
||||
"<td>…</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td rowspan=\"3\"><b>Biomarker measurements</b></td>\n",
|
||||
"<td>APOE4</td>\n",
|
||||
"<td>1.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>TAU</td>\n",
|
||||
"<td>212.5</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>…</td>\n",
|
||||
"<td>…</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td rowspan=\"3\"><b>Cognitive and neurofunctional Assessments</b></td>\n",
|
||||
"<td>MMSE</td>\n",
|
||||
"<td>29.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>CDRSB</td>\n",
|
||||
"<td>0.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>…</td>\n",
|
||||
"<td>…</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td rowspan=\"4\"><b>Volumetric data</b></td>\n",
|
||||
"<td>FLDSTRENG</td>\n",
|
||||
"<td>1.5 Tesla MRI</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Ventricles</td>\n",
|
||||
"<td>84599</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Hippocampus</td>\n",
|
||||
"<td>5319</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>…</td>\n",
|
||||
"<td>…</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"Figure 2: An example of a patient table and its corresponding clinical description.\n",
|
||||
"\n",
|
||||
"----\n",
|
||||
"\n",
|
||||
"Skills. Mathematics, as a highly structured and logic-driven discipline, provides an ideal testing ground for evaluating this reasoning ability. To investigate o1-preview’s performance, we designed a series of tests covering various difficulty levels. We begin with high school-level math competition problems in this section, followed by college-level mathematics problems in the next section, allowing us to observe the model’s logical reasoning across varying levels of complexity.\n",
|
||||
"skills. Mathematics, as a highly structured and logic-driven discipline, provides an ideal testing ground for evaluating this reasoning ability. To investigate o1-preview’s performance, we designed a series of tests covering various difficulty levels. We begin with high school-level math competition problems in this section, followed by college-level mathematics problems in the next section, allowing us to observe the model’s logical reasoning across varying levels of complexity.\n",
|
||||
"\n",
|
||||
"In this section, we selected two primary areas of mathematics: algebra and counting and probability in this section. We chose these two topics because of their heavy reliance on problem-solving skills and their frequent use in assessing logical and abstract thinking [46]. The dataset used in testing is from the MATH dataset [46]. The problems in the dataset cover a wide range of subjects, including Prealgebra, Intermediate Algebra, Algebra, Geometry, Counting and Probability, Number Theory, and Precalculus. Each problem is categorized based on difficulty, ranked from level 1 to 5, according to the Art of Problem Solving (AoPS). The dataset mainly comprises problems from various high school math competitions, including the American Mathematics Competitions (AMC) 10 and 12, as well as the American Invitational Mathematics Examination (AIME), and other similar contests. Each problem comes with detailed reference solutions, allowing for a comprehensive comparison of o1-preview’s solutions.\n",
|
||||
"\n",
|
||||
@@ -282,7 +429,7 @@
|
||||
],
|
||||
"source": [
|
||||
"# using GPT-4o\n",
|
||||
"print(nodes[0].get_content(metadata_mode=\"all\"))"
|
||||
"print(gpt_nodes[0].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -291,9 +438,9 @@
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "llamacloud",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llamacloud"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -13,7 +13,12 @@
|
||||
"source": [
|
||||
"# Parse Selected Pages \n",
|
||||
"\n",
|
||||
"In this notebook we will demonstrate how to parse selected pages in a document using LlamaParse."
|
||||
"In this notebook we will demonstrate how to parse selected pages in a document using LlamaParse.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -22,7 +27,7 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Here we install `llama-parse` used for parsing the document"
|
||||
"Here we install `llama-cloud-services` and use `LlamaParse` for parsing the document."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,7 +55,7 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# API access to llama-cloud\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"<YOUR LLAMACLOUD API KEY>\""
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -89,17 +94,26 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id ad1087c1-b085-4dc7-9aa8-d13cdd440f2b\n"
|
||||
"Started parsing the file under job_id d9d7ecc9-766c-48c6-92a8-17432d34818a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(target_pages=\"0,1,2\")\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" # target pages allows for a few formats: 1,2,3 or 1-3 or 1,3,5-7, etc.\n",
|
||||
" target_pages=\"0,1,2\",\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"results = await parser.aparse(\"./uber_2021.pdf\")\n",
|
||||
"documents = results.get_text_documents(split_by_page=True)"
|
||||
"documents = results.get_markdown_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -110,9 +124,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id_='d0b34f4a-27ef-48e2-a92a-386e5e265f4c', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\\n', text='# UNITED STATES SECURITIES AND EXCHANGE COMMISSION\\n\\n# Washington, D.C. 20549\\n\\n# FORM 10-K\\n\\n(Mark One)\\n\\n☒ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\\n\\nFor the fiscal year ended December 31, 2021\\n\\nOR\\n\\n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\\n\\nFor the transition period from _____ to _____\\n\\nCommission File Number: 001-38902\\n\\n# UBER TECHNOLOGIES, INC.\\n\\n(Exact name of registrant as specified in its charter)\\n\\nDelaware\\n\\n45-2647441\\n\\n(State or other jurisdiction of incorporation or organization) (I.R.S. Employer Identification No.)\\n\\n1515 3rd Street\\n\\nSan Francisco, California 94158\\n\\n(Address of principal executive offices, including zip code)\\n\\n(415) 612-8582\\n\\n(Registrant’s telephone number, including area code)\\n\\n# Securities registered pursuant to Section 12(b) of the Act:\\n\\n|Title of each class|Trading Symbol(s)|Name of each exchange on which registered|\\n|---|---|---|\\n|Common Stock, par value $0.00001 per share|UBER|New York Stock Exchange|\\n\\nSecurities registered pursuant to Section 12(g) of the Act: None\\n\\nIndicate by check mark whether the registrant is a well-known seasoned issuer, as defined in Rule 405 of the Securities Act. Yes ☒ No ☐\\n\\nIndicate by check mark whether the registrant is not required to file reports pursuant to Section 13 or Section 15(d) of the Act. Yes ☐ No ☒\\n\\nIndicate by check mark whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 during the preceding 12 months (or for such shorter period that the registrant was required to file such reports), and (2) has been subject to such filing requirements for the past 90 days. Yes ☒ No ☐\\n\\nIndicate by check mark whether the registrant has submitted electronically every Interactive Data File required to be submitted pursuant to Rule 405 of Regulation S-T (§232.405 of this chapter) during the preceding 12 months (or for such shorter period that the registrant was required to submit such files). Yes ☒ No ☐\\n\\nIndicate by check mark whether the registrant is a large accelerated filer, an accelerated filer, a non-accelerated filer, a smaller reporting company, or an emerging growth company. See the definitions of “large accelerated filer,” “accelerated filer,” “smaller reporting company,” and “emerging growth company” in Rule 12b-2 of the Exchange Act.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\\n', text_template='{metadata_str}\\n\\n{content}'),\n",
|
||||
" Document(id_='253b1141-a260-466e-b164-b39df67ef799', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\\n', text=\"# Large accelerated filer\\n\\n☒\\n\\n# Accelerated filer\\n\\n☐\\n\\n# Non-accelerated filer\\n\\n☐\\n\\n# Smaller reporting company\\n\\n☐\\n\\n# Emerging growth company\\n\\n☐\\n\\nIf an emerging growth company, indicate by check mark if the registrant has elected not to use the extended transition period for complying with any new or revised financial accounting standards provided pursuant to Section 13(a) of the Exchange Act.\\n\\n☐\\n\\nIndicate by check mark whether the registrant has filed a report on and attestation to its management’s assessment of the effectiveness of its internal control over financial reporting under Section 404(b) of the Sarbanes-Oxley Act (15 U.S.C. 7262(b)) by the registered public accounting firm that prepared or issued\\n\\n☒\\n\\nIndicate by check mark whether the registrant is a shell company (as defined in Rule 12b-2 of the Exchange Act). Yes\\n\\n☐\\n\\nNo\\n\\n☒\\n\\nThe aggregate market value of the voting and non-voting common equity held by non-affiliates of the registrant as of June 30, 2021, the last business day of the registrant's most recently completed second fiscal quarter, was approximately $90.5 billion based upon the closing price reported for such date on the New York Stock Exchange.\\n\\nThe number of shares of the registrant's common stock outstanding as of February 22, 2022 was 1,954,464,088.\\n\\n# DOCUMENTS INCORPORATED BY REFERENCE\\n\\nPortions of the registrant’s Definitive Proxy Statement relating to the Annual Meeting of Stockholders are incorporated by reference into Part III of this Annual Report on Form 10-K where indicated. Such Definitive Proxy Statement will be filed with the Securities and Exchange Commission within 120 days after the end of the registrant’s fiscal year ended December 31, 2021.\", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\\n', text_template='{metadata_str}\\n\\n{content}'),\n",
|
||||
" Document(id_='ad988239-3ab5-498d-85ba-a29241db24d4', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\\n', text='# UBER TECHNOLOGIES, INC.\\n\\n# TABLE OF CONTENTS\\n\\n|Special Note Regarding Forward-Looking Statements|2|\\n|---|---|\\n|PART I|PART I|\\n|Item 1. Business|4|\\n|Item 1A. Risk Factors|11|\\n|Item 1B. Unresolved Staff Comments|46|\\n|Item 2. Properties|46|\\n|Item 3. Legal Proceedings|46|\\n|Item 4. Mine Safety Disclosures|47|\\n|PART II|PART II|\\n|Item 5. Market for Registrant’s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities|47|\\n|Item 6. [Reserved]|48|\\n|Item 7. Management’s Discussion and Analysis of Financial Condition and Results of Operations|48|\\n|Item 7A. Quantitative and Qualitative Disclosures About Market Risk|69|\\n|Item 8. Financial Statements and Supplementary Data|70|\\n|Item 9. Changes in and Disagreements with Accountants on Accounting and Financial Disclosure|146|\\n|Item 9A. Controls and Procedures|147|\\n|Item 9B. Other Information|147|\\n|Item 9C. Disclosure Regarding Foreign Jurisdictions that Prevent Inspections|147|\\n|PART III|PART III|\\n|Item 10. Directors, Executive Officers and Corporate Governance|147|\\n|Item 11. Executive Compensation|147|\\n|Item 12. Security Ownership of Certain Beneficial Owners and Management and Related Stockholder Matters|148|\\n|Item 13. Certain Relationships and Related Transactions, and Director Independence|148|\\n|Item 14. Principal Accounting Fees and Services|148|\\n|PART IV|PART IV|\\n|Item 15. Exhibits, Financial Statement Schedules|148|\\n|Item 16. Form 10-K Summary|148|\\n|Exhibit Index|149|\\n|Signatures|152|', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\\n', text_template='{metadata_str}\\n\\n{content}')]"
|
||||
"'\\n# UNITED STATES \\n## SECURITIES AND EXCHANGE COMMISSION \\nWashington, D.C. 20549 \\n____________________________________________ \\n# FORM 10-K \\n____________________________________________ \\n\\n(Mark One) \\n\\n[x] **ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934** \\nFor the fiscal year ended December 31, 2021 \\nOR \\n[ ] **TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934** \\nFor the transition period from_____ to _____ \\nCommission File Number: 001-38902 \\n____________________________________________ \\n\\n# UBER TECHNOLOGIES, INC. \\n\\n(Exact name of registrant as specified in its charter) \\n____________________________________________ \\n\\nDelaware | 45-2647441 \\n(State or other jurisdiction of incorporation or organization) | (I.R.S. Employer Identification No.) \\n\\n1515 3rd Street \\nSan Francisco, California 94158 \\n(Address of principal executive offices, including zip code) \\n\\n(415) 612-8582 \\n(Registrant’s telephone number, including area code) \\n____________________________________________ \\n\\nSecurities registered pursuant to Section 12(b) of the Act: \\n\\n<table>\\n<thead>\\n<tr>\\n<th>Title of each class</th>\\n<th>Trading Symbol(s)</th>\\n<th>Name of each exchange on which registered</th>\\n</tr>\\n</thead>\\n<tbody>\\n<tr>\\n<td>Common Stock, par value $0.00001 per share</td>\\n<td>UBER</td>\\n<td>New York Stock Exchange</td>\\n</tr>\\n</tbody>\\n</table>\\n\\nSecurities registered pursuant to Section 12(g) of the Act: None \\n\\n* Indicate by check mark whether the registrant is a well-known seasoned issuer, as defined in Rule 405 of the Securities Act. \\n - Yes [x] \\n - No [ ] \\n\\n* Indicate by check mark whether the registrant is not required to file reports pursuant to Section 13 or Section 15(d) of the Act. \\n - Yes [ ] \\n - No [x] \\n\\n* Indicate by check mark whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 during the preceding 12 months '"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
@@ -121,15 +133,35 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents"
|
||||
"documents[0].text[:2000]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(documents)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llamacloud",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llamacloud"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -6,7 +6,12 @@
|
||||
"source": [
|
||||
"# Table Extraction with LlamaParse\n",
|
||||
"\n",
|
||||
"This notebook will show you how to extract tables and save them as CSV files thanks to LlamaParse advanced parsing capabilities."
|
||||
"This notebook will show you how to extract tables and save them as CSV files thanks to LlamaParse advanced parsing capabilities.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -22,7 +27,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install llama-cloud-services pandas"
|
||||
"%pip install llama-cloud-services pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -36,20 +41,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LLAMA_CLOUD_API_KEY: ··········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = getpass(\"LLAMA_CLOUD_API_KEY: \")"
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -67,7 +63,14 @@
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(result_type=\"markdown\")"
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,16 +91,16 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2025-07-16 16:20:41-- https://assets.accessible-digital-documents.com/uploads/2017/01/sample-tables.pdf\n",
|
||||
"Resolving assets.accessible-digital-documents.com (assets.accessible-digital-documents.com)... 3.166.135.2, 3.166.135.62, 3.166.135.51, ...\n",
|
||||
"Connecting to assets.accessible-digital-documents.com (assets.accessible-digital-documents.com)|3.166.135.2|:443... connected.\n",
|
||||
"--2025-08-19 16:05:55-- https://assets.accessible-digital-documents.com/uploads/2017/01/sample-tables.pdf\n",
|
||||
"Resolving assets.accessible-digital-documents.com (assets.accessible-digital-documents.com)... 18.64.67.96, 18.64.67.90, 18.64.67.78, ...\n",
|
||||
"Connecting to assets.accessible-digital-documents.com (assets.accessible-digital-documents.com)|18.64.67.96|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 145494 (142K) [application/pdf]\n",
|
||||
"Saving to: ‘sample-tables.pdf’\n",
|
||||
"\n",
|
||||
"sample-tables.pdf 100%[===================>] 142.08K --.-KB/s in 0.04s \n",
|
||||
"sample-tables.pdf 100%[===================>] 142.08K 529KB/s in 0.3s \n",
|
||||
"\n",
|
||||
"2025-07-16 16:20:41 (3.72 MB/s) - ‘sample-tables.pdf’ saved [145494/145494]\n",
|
||||
"2025-08-19 16:05:57 (529 KB/s) - ‘sample-tables.pdf’ saved [145494/145494]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
@@ -122,12 +125,12 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id b53949f7-9017-4b6a-b30c-be6227271ed2\n"
|
||||
"Started parsing the file under job_id 727ce176-96bd-4cd1-84e3-fb64e08de336\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"json_result = parser.get_json_result(\"sample-tables.pdf\")"
|
||||
"result = await parser.aparse(\"sample-tables.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -141,9 +144,23 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[['Rainfall (inches)', 'Americas', 'Asia', 'Europe', 'Africa'], ['', '133', '244', '155', '166'], ['', '27', '28', '29', '20'], ['', '11', '12', '13', '16']]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tables = parser.get_tables(json_result, \"tables/\")"
|
||||
"tables = []\n",
|
||||
"for page in result.pages:\n",
|
||||
" for item in page.items:\n",
|
||||
" if item.type == \"table\":\n",
|
||||
" tables.append(item.rows)\n",
|
||||
"\n",
|
||||
"print(tables[8])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -162,14 +179,8 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.google.colaboratory.intrinsic+json": {
|
||||
"summary": "{\n \"name\": \"display(df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"Rainfall\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Average\",\n \"\",\n \"24 hour high\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Americas\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 908,\n \"min\": 9,\n \"max\": 2010,\n \"num_unique_values\": 8,\n \"samples\": [\n 104,\n 133,\n 2010\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Asia\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"\",\n 201.0,\n 28.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Europe\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"\",\n 193.0,\n 29.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Africa\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"\",\n 144.0,\n 20.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
|
||||
"type": "dataframe"
|
||||
},
|
||||
"text/html": [
|
||||
"\n",
|
||||
" <div id=\"df-94a74c8f-1062-4a80-8d3f-32f0fbadf7bb\" class=\"colab-df-container\">\n",
|
||||
" <div>\n",
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
@@ -187,315 +198,69 @@
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Rainfall</th>\n",
|
||||
" <th>Americas</th>\n",
|
||||
" <th>Asia</th>\n",
|
||||
" <th>Europe</th>\n",
|
||||
" <th>Africa</th>\n",
|
||||
" <th>0</th>\n",
|
||||
" <th>1</th>\n",
|
||||
" <th>2</th>\n",
|
||||
" <th>3</th>\n",
|
||||
" <th>4</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>(inches)</td>\n",
|
||||
" <td>2010</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td>Rainfall (inches)</td>\n",
|
||||
" <td>Americas</td>\n",
|
||||
" <td>Asia</td>\n",
|
||||
" <td>Europe</td>\n",
|
||||
" <td>Africa</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Average</td>\n",
|
||||
" <td>104</td>\n",
|
||||
" <td>201.0</td>\n",
|
||||
" <td>193.0</td>\n",
|
||||
" <td>144.0</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td>133</td>\n",
|
||||
" <td>244</td>\n",
|
||||
" <td>155</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>24 hour high</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>26.0</td>\n",
|
||||
" <td>27.0</td>\n",
|
||||
" <td>18.0</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td>27</td>\n",
|
||||
" <td>28</td>\n",
|
||||
" <td>29</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>12 hour high</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>10.0</td>\n",
|
||||
" <td>11.0</td>\n",
|
||||
" <td>12.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td></td>\n",
|
||||
" <td>2009</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>Average</td>\n",
|
||||
" <td>133</td>\n",
|
||||
" <td>244.0</td>\n",
|
||||
" <td>155.0</td>\n",
|
||||
" <td>166.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>24 hour high</td>\n",
|
||||
" <td>27</td>\n",
|
||||
" <td>28.0</td>\n",
|
||||
" <td>29.0</td>\n",
|
||||
" <td>20.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td>12 hour high</td>\n",
|
||||
" <td>11</td>\n",
|
||||
" <td>12.0</td>\n",
|
||||
" <td>13.0</td>\n",
|
||||
" <td>16.0</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>13</td>\n",
|
||||
" <td>16</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
" <div class=\"colab-df-buttons\">\n",
|
||||
"\n",
|
||||
" <div class=\"colab-df-container\">\n",
|
||||
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-94a74c8f-1062-4a80-8d3f-32f0fbadf7bb')\"\n",
|
||||
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||||
" style=\"display:none;\">\n",
|
||||
"\n",
|
||||
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
||||
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
||||
" </svg>\n",
|
||||
" </button>\n",
|
||||
"\n",
|
||||
" <style>\n",
|
||||
" .colab-df-container {\n",
|
||||
" display:flex;\n",
|
||||
" gap: 12px;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-convert {\n",
|
||||
" background-color: #E8F0FE;\n",
|
||||
" border: none;\n",
|
||||
" border-radius: 50%;\n",
|
||||
" cursor: pointer;\n",
|
||||
" display: none;\n",
|
||||
" fill: #1967D2;\n",
|
||||
" height: 32px;\n",
|
||||
" padding: 0 0 0 0;\n",
|
||||
" width: 32px;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-convert:hover {\n",
|
||||
" background-color: #E2EBFA;\n",
|
||||
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||||
" fill: #174EA6;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-buttons div {\n",
|
||||
" margin-bottom: 4px;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" [theme=dark] .colab-df-convert {\n",
|
||||
" background-color: #3B4455;\n",
|
||||
" fill: #D2E3FC;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" [theme=dark] .colab-df-convert:hover {\n",
|
||||
" background-color: #434B5C;\n",
|
||||
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||||
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||||
" fill: #FFFFFF;\n",
|
||||
" }\n",
|
||||
" </style>\n",
|
||||
"\n",
|
||||
" <script>\n",
|
||||
" const buttonEl =\n",
|
||||
" document.querySelector('#df-94a74c8f-1062-4a80-8d3f-32f0fbadf7bb button.colab-df-convert');\n",
|
||||
" buttonEl.style.display =\n",
|
||||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||||
"\n",
|
||||
" async function convertToInteractive(key) {\n",
|
||||
" const element = document.querySelector('#df-94a74c8f-1062-4a80-8d3f-32f0fbadf7bb');\n",
|
||||
" const dataTable =\n",
|
||||
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||||
" [key], {});\n",
|
||||
" if (!dataTable) return;\n",
|
||||
"\n",
|
||||
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||||
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||||
" + ' to learn more about interactive tables.';\n",
|
||||
" element.innerHTML = '';\n",
|
||||
" dataTable['output_type'] = 'display_data';\n",
|
||||
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||||
" const docLink = document.createElement('div');\n",
|
||||
" docLink.innerHTML = docLinkHtml;\n",
|
||||
" element.appendChild(docLink);\n",
|
||||
" }\n",
|
||||
" </script>\n",
|
||||
" </div>\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" <div id=\"df-54b2aa43-838b-47d3-9209-2fb18153cf87\">\n",
|
||||
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-54b2aa43-838b-47d3-9209-2fb18153cf87')\"\n",
|
||||
" title=\"Suggest charts\"\n",
|
||||
" style=\"display:none;\">\n",
|
||||
"\n",
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||||
" width=\"24px\">\n",
|
||||
" <g>\n",
|
||||
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
||||
" </g>\n",
|
||||
"</svg>\n",
|
||||
" </button>\n",
|
||||
"\n",
|
||||
"<style>\n",
|
||||
" .colab-df-quickchart {\n",
|
||||
" --bg-color: #E8F0FE;\n",
|
||||
" --fill-color: #1967D2;\n",
|
||||
" --hover-bg-color: #E2EBFA;\n",
|
||||
" --hover-fill-color: #174EA6;\n",
|
||||
" --disabled-fill-color: #AAA;\n",
|
||||
" --disabled-bg-color: #DDD;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" [theme=dark] .colab-df-quickchart {\n",
|
||||
" --bg-color: #3B4455;\n",
|
||||
" --fill-color: #D2E3FC;\n",
|
||||
" --hover-bg-color: #434B5C;\n",
|
||||
" --hover-fill-color: #FFFFFF;\n",
|
||||
" --disabled-bg-color: #3B4455;\n",
|
||||
" --disabled-fill-color: #666;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-quickchart {\n",
|
||||
" background-color: var(--bg-color);\n",
|
||||
" border: none;\n",
|
||||
" border-radius: 50%;\n",
|
||||
" cursor: pointer;\n",
|
||||
" display: none;\n",
|
||||
" fill: var(--fill-color);\n",
|
||||
" height: 32px;\n",
|
||||
" padding: 0;\n",
|
||||
" width: 32px;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-quickchart:hover {\n",
|
||||
" background-color: var(--hover-bg-color);\n",
|
||||
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||||
" fill: var(--button-hover-fill-color);\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-quickchart-complete:disabled,\n",
|
||||
" .colab-df-quickchart-complete:disabled:hover {\n",
|
||||
" background-color: var(--disabled-bg-color);\n",
|
||||
" fill: var(--disabled-fill-color);\n",
|
||||
" box-shadow: none;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-spinner {\n",
|
||||
" border: 2px solid var(--fill-color);\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-bottom-color: var(--fill-color);\n",
|
||||
" animation:\n",
|
||||
" spin 1s steps(1) infinite;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" @keyframes spin {\n",
|
||||
" 0% {\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-bottom-color: var(--fill-color);\n",
|
||||
" border-left-color: var(--fill-color);\n",
|
||||
" }\n",
|
||||
" 20% {\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-left-color: var(--fill-color);\n",
|
||||
" border-top-color: var(--fill-color);\n",
|
||||
" }\n",
|
||||
" 30% {\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-left-color: var(--fill-color);\n",
|
||||
" border-top-color: var(--fill-color);\n",
|
||||
" border-right-color: var(--fill-color);\n",
|
||||
" }\n",
|
||||
" 40% {\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-right-color: var(--fill-color);\n",
|
||||
" border-top-color: var(--fill-color);\n",
|
||||
" }\n",
|
||||
" 60% {\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-right-color: var(--fill-color);\n",
|
||||
" }\n",
|
||||
" 80% {\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-right-color: var(--fill-color);\n",
|
||||
" border-bottom-color: var(--fill-color);\n",
|
||||
" }\n",
|
||||
" 90% {\n",
|
||||
" border-color: transparent;\n",
|
||||
" border-bottom-color: var(--fill-color);\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"\n",
|
||||
" <script>\n",
|
||||
" async function quickchart(key) {\n",
|
||||
" const quickchartButtonEl =\n",
|
||||
" document.querySelector('#' + key + ' button');\n",
|
||||
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
||||
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
||||
" try {\n",
|
||||
" const charts = await google.colab.kernel.invokeFunction(\n",
|
||||
" 'suggestCharts', [key], {});\n",
|
||||
" } catch (error) {\n",
|
||||
" console.error('Error during call to suggestCharts:', error);\n",
|
||||
" }\n",
|
||||
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
||||
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
||||
" }\n",
|
||||
" (() => {\n",
|
||||
" let quickchartButtonEl =\n",
|
||||
" document.querySelector('#df-54b2aa43-838b-47d3-9209-2fb18153cf87 button');\n",
|
||||
" quickchartButtonEl.style.display =\n",
|
||||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||||
" })();\n",
|
||||
" </script>\n",
|
||||
" </div>\n",
|
||||
"\n",
|
||||
" </div>\n",
|
||||
" </div>\n"
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Rainfall Americas Asia Europe Africa\n",
|
||||
"0 (inches) 2010 \n",
|
||||
"1 Average 104 201.0 193.0 144.0\n",
|
||||
"2 24 hour high 15 26.0 27.0 18.0\n",
|
||||
"3 12 hour high 9 10.0 11.0 12.0\n",
|
||||
"4 2009 \n",
|
||||
"5 Average 133 244.0 155.0 166.0\n",
|
||||
"6 24 hour high 27 28.0 29.0 20.0\n",
|
||||
"7 12 hour high 11 12.0 13.0 16.0"
|
||||
" 0 1 2 3 4\n",
|
||||
"0 Rainfall (inches) Americas Asia Europe Africa\n",
|
||||
"1 133 244 155 166\n",
|
||||
"2 27 28 29 20\n",
|
||||
"3 11 12 13 16"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from IPython.display import display\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\n",
|
||||
" \"/content/tables/table_2025_16_07_16_30_01_569.csv\",\n",
|
||||
")\n",
|
||||
"display(df.fillna(\"\"))"
|
||||
"df = pd.DataFrame(tables[8])\n",
|
||||
"df.head()"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -504,11 +269,20 @@
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
+130
-344
@@ -10,7 +10,11 @@
|
||||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/excel/dcf_rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"This notebook constructs a RAG pipeline over a simple DCF template [here](https://eqvista.com/app/uploads/2020/09/Eqvista_DCF-Excel-Template.xlsx).\n",
|
||||
"\n"
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -20,7 +24,7 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"We first setup and load the data. If you haven't already, [download the template](https://eqvista.com/app/uploads/2020/09/Eqvista_DCF-Excel-Template.xlsx) and name it `dcf_template.xlxs` locally."
|
||||
"We first setup and load the data. If you haven't already, [download the template](https://eqvista.com/wp-content/uploads/2020/09/Eqvista_DCF-Excel-Template.xlsx) and name it `dcf_template.xlxs` locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -30,32 +34,21 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index\n",
|
||||
"%pip install \"llama-index>=0.13.0<0.14.0\"\n",
|
||||
"%pip install llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "103c7983-56d3-45be-b763-d1828d07c43e",
|
||||
"id": "9876ae6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b694b56-e04b-4d87-aa37-f0725d6b3adb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"# api_key = \"llx-\" # get from cloud.llamaindex.ai"
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -68,18 +61,25 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id cac11eca-d5da-4d46-90e6-321f40e11611\n",
|
||||
"Started parsing the file under job_id cac11eca-5450-4847-9da0-fa6879c4cf3a\n"
|
||||
"Started parsing the file under job_id 1adabb9a-31d3-4732-962f-a287d5f7af2a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" # api_key=api_key, # can also be set in your env as LLAMA_CLOUD_API_KEY\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" api_key=\"llx-jwAQZL8T38onyL9hKBOXyRtnuCU0Fk3z7tmDhIT3L0GEfohJ\",\n",
|
||||
")\n",
|
||||
"docs = parser.load_data(\"./dcf_template.xlsx\")\n",
|
||||
"# docs_txt = LlamaParse(result_type=\"text\").load_data(\"./dcf_template.xlsx\")"
|
||||
"\n",
|
||||
"result = await parser.aparse(\"./dcf_template.xlsx\")\n",
|
||||
"llama_parse_documents = result.get_text_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,77 +92,51 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# Cover Page\n",
|
||||
"\n",
|
||||
"|Thank you for downloading our DCF Model excel template. This DCF Model excel template helps you to value your business using Discounted Free Cash Flow or DCF Method. | |\n",
|
||||
"|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n",
|
||||
"| | |\n",
|
||||
"| |Eqvista is an equity management software that allows companies, investors and company shareholders to track, manage, and make intelligent decisions about their companies’ equity.|\n",
|
||||
"| | |\n",
|
||||
"| |GET STARTED- IT'S FREE |\n",
|
||||
"| | |\n",
|
||||
"| |Note: This template is not professional advice and not a substitute for professional advice. |\n",
|
||||
"|Accordingly, before taking any actions based upon such information, we encourage you to consult with the appropriate professionals. | |\n",
|
||||
"| | |\n",
|
||||
"| |@Eqvista Inc. All Rights Reserved |\n",
|
||||
"---\n",
|
||||
"# DCF Model\n",
|
||||
"\n",
|
||||
"|Discounted Cash Flow Excel Template | | | | | | | | | | | |\n",
|
||||
"|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------|-----------|-----------|-----------------------|-----------|-----------------------|--------------|-----------|-----------|-----------|--------------|\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"|Here is a simple discounted cash flow excel template for estimating your company value based on this income valuation approach | | | | | | | | | | | |\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"|Instructions: | | | | | | | | | | | |\n",
|
||||
"|1) Fill out the two assumptions in yellow highlight | | | | | | | | | | | |\n",
|
||||
"|2) Fill in either the 5 year or 3 year weighted average figures in yellow highlight | | | | | | | | | | | |\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"|Assumptions | | | | | | | | | | | |\n",
|
||||
"|Tax Rate |20% | | | | | | | | | | |\n",
|
||||
"|Discount Rate |15% | | | | | | | | | | |\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"|5 Year Weighted Moving Average | | | | | | | | | | | |\n",
|
||||
"|Indication of Company Value |$242,995.43 | | | | | | | | | | |\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"|3 Year Weighted Moving Average | | | | | | | | | | | |\n",
|
||||
"|Indication of Company Value |$158,651.07 | | | | | | | | | | |\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"| |5 Year Weighted Moving Average| | | | | | | | | | |\n",
|
||||
"| |Past Years | | | | |Forecasted Future Years| | | | | |\n",
|
||||
"| |Year 1 |Year 2 |Year 3 |Year 4 |Year 5 |Year 6 |Year 7 |Year 8 |Year 9 |Year 10 |Terminal Value|\n",
|
||||
"|Pre-tax income |50,000.00 |55,000.00 |45,000.00 |52,000.00 |60,000.00 | | | | | | |\n",
|
||||
"|Income Taxes |10,000.00 |11,000.00 |9,000.00 |10,400.00 |12,000.00 | | | | | | |\n",
|
||||
"|Net Income |40,000.00 |44,000.00 |36,000.00 |41,600.00 |48,000.00 | | | | | | |\n",
|
||||
"|Depreciation Expense |5,000.00 |4,000.00 |3,000.00 |2,000.00 |1,000.00 | | | | | | |\n",
|
||||
"|Capital Expenditures |10,000.00 |8,000.00 |5,000.00 |5,000.00 |7,000.00 | | | | | | |\n",
|
||||
"|Debt Repayments |5,000.00 |5,000.00 |5,000.00 |5,000.00 |5,000.00 | | | | | | |\n",
|
||||
"|Net Cash Flow |20,000.00 |27,000.00 |23,000.00 |29,600.00 |35,000.00 |29,093.33 |29,817.78 |30,177.48 |30,469.23 |30,379.74 |287,188.00 |\n",
|
||||
"|Discounting Factor | | | | | |0.8696 |0.7561 |0.6575 |0.5718 |0.4972 |0.4972 |\n",
|
||||
"|Present Value of Future Cash Flow | | | | | |25,298.55 |22,546.52 |19,842.18 |17,420.88 |15,104.10 |142,783.19 |\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"| |3 Year Weighted Moving Average| | | | | | | | | | |\n",
|
||||
"| |Past Years | | |Forecasted Future Years| | | | | | | |\n",
|
||||
"| |Year 1 |Year 2 |Year 3 |Year 4 |Year 5 |Year 6 |Terminal Value| | | | |\n",
|
||||
"|Pre-tax income |50,000.00 |55,000.00 |45,000.00 | | | | | | | | |\n",
|
||||
"|Income Taxes |10,000.00 |11,000.00 |9,000.00 | | | | | | | | |\n",
|
||||
"|Net Income |40,000.00 |44,000.00 |36,000.00 | | | | | | | | |\n",
|
||||
"|Depreciation Expense |5,000.00 |4,000.00 |3,000.00 | | | | | | | | |\n",
|
||||
"|Capital Expenditures |10,000.00 |8,000.00 |5,000.00 | | | | | | | | |\n",
|
||||
"|Debt Repayments |5,000.00 |5,000.00 |5,000.00 | | | | | | | | |\n",
|
||||
"|Net Cash Flow |20,000.00 |27,000.00 |23,000.00 |23,833.33 |24,083.33 |23,819.44 |158,253.59 | | | | |\n",
|
||||
"|Discounting Factor | | | |0.8696 |0.7561 |0.6575 |0.6575 | | | | |\n",
|
||||
"|Present Value of Future Cash Flow | | | |20,724.64 |18,210.46 |15,661.67 |104,054.30 | | | | |\n",
|
||||
"| | | | | | | | | | | | |\n",
|
||||
"|Notes: | | | | | | | | | | | |\n",
|
||||
"|-We based this simple discounted cash flow excel model based on the weighted moving averages (5 year or 3 year) for simplicity, in case a constant growth rate cannot be easily determined.| | | | | | | | | | | |\n",
|
||||
"|-The factors such as Depreciation Expense, Capital Expense and Debt Repayments remain constant, so consider this when looking at the forecasted figures. | | | | | | | | | | | |\n",
|
||||
"|-For the terminal value constant growth rate, we make the assumption of the growth from the last forecasted year compared to the first forecasted year. Adjust in the formula as needed. | | | | | | | | | | | |\n",
|
||||
"\n"
|
||||
"Discounted Cash Flow Excel Template\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"Here is a simple discounted cash flow excel template for estimating your company value based on this income valuation approach\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"Instructions:\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"1) Fill out the two assumptions in yellow highlight\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"2) Fill in either the 5 year or 3 year weighted average figures in yellow highlight\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"Assumptions\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"Tax Rate\t20%\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"Discount Rate\t15%\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"5 Year Weighted Moving Average\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"Indication of Company Value\t $242,995.43 \t\t\t\t\t\t\t\t\t\t\n",
|
||||
"3 Year Weighted Moving Average\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"Indication of Company Value\t $158,651.07 \t\t\t\t\t\t\t\t\t\t\n",
|
||||
"\t5 Year Weighted Moving Average\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"\tPast Years\t\t\t\t\tForecasted Future Years\t\t\t\t\t\n",
|
||||
"\tYear 1\tYear 2\tYear 3\tYear 4\tYear 5\tYear 6\tYear 7\tYear 8\tYear 9\tYear 10\tTerminal Value\n",
|
||||
"Pre-tax income\t 50,000.00 \t 55,000.00 \t 45,000.00 \t 52,000.00 \t 60,000.00 \t\t\t\t\t\t\n",
|
||||
"Income Taxes\t 10,000.00 \t 11,000.00 \t 9,000.00 \t 10,400.00 \t 12,000.00 \t\t\t\t\t\t\n",
|
||||
"Net Income\t 40,000.00 \t 44,000.00 \t 36,000.00 \t 41,600.00 \t 48,000.00 \t\t\t\t\t\t\n",
|
||||
"Depreciation Expense\t 5,000.00 \t 4,000.00 \t 3,000.00 \t 2,000.00 \t 1,000.00 \t\t\t\t\t\t\n",
|
||||
"Capital Expenditures\t 10,000.00 \t 8,000.00 \t 5,000.00 \t 5,000.00 \t 7,000.00 \t\t\t\t\t\t\n",
|
||||
"Debt Repayments\t 5,000.00 \t 5,000.00 \t 5,000.00 \t 5,000.00 \t 5,000.00 \t\t\t\t\t\t\n",
|
||||
"Net Cash Flow\t 20,000.00 \t 27,000.00 \t 23,000.00 \t 29,600.00 \t 35,000.00 \t 29,093.33 \t 29,817.78 \t 30,177.48 \t 30,469.23 \t 30,379.74 \t 287,188.00 \n",
|
||||
"Discounting Factor\t\t\t\t\t\t 0.8696 \t 0.7561 \t 0.6575 \t 0.5718 \t 0.4972 \t 0.4972 \n",
|
||||
"Present Value of Future Cash Flow\t\t\t\t\t\t 25,298.55 \t 22,546.52 \t 19,842.18 \t 17,420.88 \t 15,104.10 \t 142,783.19 \n",
|
||||
"\t3 Year Weighted Moving Average\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"\tPast Years\t\t\tForecasted Future Years\t\t\t\t\t\t\t\n",
|
||||
"\tYear 1\tYear 2\tYear 3\tYear 4\tYear 5\tYear 6\tTerminal Value\t\t\t\t\n",
|
||||
"Pre-tax income\t 50,000.00 \t 55,000.00 \t 45,000.00 \t\t\t\t\t\t\t\t\n",
|
||||
"Income Taxes\t 10,000.00 \t 11,000.00 \t 9,000.00 \t\t\t\t\t\t\t\t\n",
|
||||
"Net Income\t 40,000.00 \t 44,000.00 \t 36,000.00 \t\t\t\t\t\t\t\t\n",
|
||||
"Depreciation Expense\t 5,000.00 \t 4,000.00 \t 3,000.00 \t\t\t\t\t\t\t\t\n",
|
||||
"Capital Expenditures\t 10,000.00 \t 8,000.00 \t 5,000.00 \t\t\t\t\t\t\t\t\n",
|
||||
"Debt Repayments\t 5,000.00 \t 5,000.00 \t 5,000.00 \t\t\t\t\t\t\t\t\n",
|
||||
"Net Cash Flow\t 20,000.00 \t 27,000.00 \t 23,000.00 \t 23,833.33 \t 24,083.33 \t 23,819.44 \t 158,253.59 \t\t\t\t\n",
|
||||
"Discounting Factor\t\t\t\t 0.8696 \t 0.7561 \t 0.6575 \t 0.6575 \t\t\t\t\n",
|
||||
"Present Value of Future Cash Flow\t\t\t\t 20,724.64 \t 18,210.46 \t 15,661.67 \t 104,054.30 \t\t\t\t\n",
|
||||
"Notes:\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"-We based this simple discounted cash flow excel model based on the weighted moving averages (5 year or 3 year) for simplicity, in case a constant growth rate cannot be easily determined.\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"-The factors such as Depreciation Expense, Capital Expense and Debt Repayments remain constant, so consider this when looking at the forecasted figures.\t\t\t\t\t\t\t\t\t\t\t\n",
|
||||
"-For the terminal value constant growth rate, we make the assumption of the growth from the last forecasted year compared to the first forecasted year. Adjust in the formula as needed.\t\t\t\t\t\t\t\t\t\t\t\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].get_content())"
|
||||
"print(llama_parse_documents[1].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -170,9 +144,9 @@
|
||||
"id": "1aedd4bb-7939-4fbc-8f07-d362e24d9772",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure LLM, Setup Basic Summary Engine\n",
|
||||
"## Configure LLM\n",
|
||||
"\n",
|
||||
"We setup a basic summary engine which retrieves the entire document as context to put into the prompt."
|
||||
"We configure the LLM to use the OpenAI API to answer questions based on the parsed data."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -183,162 +157,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"from llama_index.core import Settings\n",
|
||||
"\n",
|
||||
"llm = OpenAI(model=\"gpt-4-turbo-preview\")\n",
|
||||
"Settings.llm = llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c0fa2630-ee1b-4ce7-91e9-f9ffff8347f9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core import SummaryIndex\n",
|
||||
"\n",
|
||||
"index = SummaryIndex.from_documents(docs)\n",
|
||||
"# index = SummaryIndex.from_documents(docs_txt)\n",
|
||||
"\n",
|
||||
"query_engine = index.as_query_engine()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1d39a075-46b8-4dcb-8aee-abd10343bedd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Define Baseline\n",
|
||||
"\n",
|
||||
"Let's define a baseline query engine over this data, using a naive parser (our PandasExcelReader, available on LlamaHub)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "632f918e-7811-4931-8a5f-4aa4850718db",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting openpyxl\n",
|
||||
" Downloading openpyxl-3.1.3-py2.py3-none-any.whl (251 kB)\n",
|
||||
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.3/251.3 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
|
||||
"\u001b[?25hCollecting et-xmlfile\n",
|
||||
" Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n",
|
||||
"Installing collected packages: et-xmlfile, openpyxl\n",
|
||||
"Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.3\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install llama-index-readers-file\n",
|
||||
"!pip install openpyxl"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85ff09fd-8a99-4aa4-8182-8d0cf30f7b85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.readers.file import PandasExcelReader\n",
|
||||
"import importlib\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"base_reader = PandasExcelReader()\n",
|
||||
"base_docs = base_reader.load_data(Path(\"dcf_template.xlsx\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ba45f806-58be-4f57-bf42-2721555136cb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Discounted Cash Flow Excel Template \n",
|
||||
" \n",
|
||||
"Here is a simple discounted cash flow excel template for estimating your company value based on this income valuation approach \n",
|
||||
" \n",
|
||||
"Instructions: \n",
|
||||
"1) Fill out the two assumptions in yellow highlight \n",
|
||||
"2) Fill in either the 5 year or 3 year weighted average figures in yellow highlight \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
"Assumptions \n",
|
||||
"Tax Rate 0.2 \n",
|
||||
"Discount Rate 0.15 \n",
|
||||
" \n",
|
||||
"5 Year Weighted Moving Average \n",
|
||||
"Indication of Company Value 242995.4347636059 \n",
|
||||
" \n",
|
||||
"3 Year Weighted Moving Average \n",
|
||||
"Indication of Company Value 158651.0723286644 \n",
|
||||
" \n",
|
||||
" 5 Year Weighted Moving Average \n",
|
||||
" Past Years Forecasted Future Years \n",
|
||||
" Year 1 Year 2 Year 3 Year 4 Year 5 Year 6 Year 7 Year 8 Year 9 Year 10 Terminal Value\n",
|
||||
"Pre-tax income 50000 55000 45000 52000 60000 \n",
|
||||
"Income Taxes 10000 11000 9000 10400 12000 \n",
|
||||
"Net Income 40000 44000 36000 41600 48000 \n",
|
||||
"Depreciation Expense 5000 4000 3000 2000 1000 \n",
|
||||
"Capital Expenditures 10000 8000 5000 5000 7000 \n",
|
||||
"Debt Repayments 5000 5000 5000 5000 5000 \n",
|
||||
"Net Cash Flow 20000 27000 23000 29600 35000 29093.333333333332 29817.777777777774 30177.481481481478 30469.234567901232 30379.73991769547 287188.0007003137\n",
|
||||
"Discounting Factor 0.8695652173913044 0.7561436672967865 0.6575162324319883 0.5717532455930334 0.4971767352982899 0.4971767352982899\n",
|
||||
"Present Value of Future Cash Flow 25298.550724637684 22546.523839529513 19842.183927989798 17420.883754932976 15104.099911490972 142783.19260502496\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" 3 Year Weighted Moving Average \n",
|
||||
" Past Years Forecasted Future Years \n",
|
||||
" Year 1 Year 2 Year 3 Year 4 Year 5 Year 6 Terminal Value \n",
|
||||
"Pre-tax income 50000 55000 45000 \n",
|
||||
"Income Taxes 10000 11000 9000 \n",
|
||||
"Net Income 40000 44000 36000 \n",
|
||||
"Depreciation Expense 5000 4000 3000 \n",
|
||||
"Capital Expenditures 10000 8000 5000 \n",
|
||||
"Debt Repayments 5000 5000 5000 \n",
|
||||
"Net Cash Flow 20000 27000 23000 23833.333333333332 24083.333333333332 23819.44444444444 158253.58851674633 \n",
|
||||
"Discounting Factor 0.8695652173913044 0.7561436672967865 0.6575162324319883 0.6575162324319883 \n",
|
||||
"Present Value of Future Cash Flow 20724.63768115942 18210.459987397608 15661.671369734164 104054.30329037321 \n",
|
||||
" \n",
|
||||
" \n",
|
||||
"Notes: \n",
|
||||
"-We based this simple discounted cash flow excel model based on the weighted moving averages (5 year or 3 year) for simplicity, in case a constant growth rate cannot be easily determined. \n",
|
||||
"-The factors such as Depreciation Expense, Capital Expense and Debt Repayments remain constant, so consider this when looking at the forecasted figures. \n",
|
||||
"-For the terminal value constant growth rate, we make the assumption of the growth from the last forecasted year compared to the first forecasted year. Adjust in the formula as needed. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(base_docs[1].get_content())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff6e812f-fa94-4b0f-8907-ee70983e53f1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core import SummaryIndex\n",
|
||||
"\n",
|
||||
"base_index = SummaryIndex.from_documents([base_docs[1]])\n",
|
||||
"\n",
|
||||
"base_query_engine = base_index.as_query_engine()"
|
||||
"llm = OpenAI(model=\"gpt-5-mini\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -348,7 +168,9 @@
|
||||
"source": [
|
||||
"## Ask Questions over this Data\n",
|
||||
"\n",
|
||||
"Let's now ask questions over this data, using both the LlamaParse-powered pipeline and naive pipeline."
|
||||
"Let's now ask questions over this data, using both the LlamaParse-powered pipeline and naive pipeline.\n",
|
||||
"\n",
|
||||
"LlamaParse-powered responses:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -356,45 +178,42 @@
|
||||
"execution_count": null,
|
||||
"id": "a875a20e-a6b6-46b7-80d4-614546215ffc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_str = \"Tell me about the income taxes in the past years (year 3-5) for the 5 year WMA table\"\n",
|
||||
"response = query_engine.query(query_str)\n",
|
||||
"base_response = base_query_engine.query(query_str)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "06b0b072-f159-47c4-9cad-9f0cc0d56b28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-19 19:35:11,505 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"******* LlamaParse RAG *******\n",
|
||||
"The income taxes in the past years (year 3 to 5) for the 5-year Weighted Moving Average table were $9,000.00 in Year 3, $10,400.00 in Year 4, and $12,000.00 in Year 5.\n",
|
||||
"******* Naive RAG *******\n",
|
||||
"The income taxes in the past years (year 3-5) for the 5 year WMA table were $9,000, $10,400, and $12,000, respectively.\n"
|
||||
"In the 5-year WMA table, income taxes for past years (Year 3–Year 5) are:\n",
|
||||
"\n",
|
||||
"- Year 3: $9,000 \n",
|
||||
"- Year 4: $10,400 \n",
|
||||
"- Year 5: $12,000\n",
|
||||
"\n",
|
||||
"These equal 20% of pre-tax income for those years (pre-tax: $45,000; $52,000; $60,000). The taxes rise steadily: Year 3 → Year 4 is about a 15.6% increase, Year 4 → Year 5 about a 15.4% increase, and Year 3 → Year 5 is a 33.3% increase.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"******* LlamaParse RAG *******\")\n",
|
||||
"print(str(response))\n",
|
||||
"print(\"******* Naive RAG *******\")\n",
|
||||
"print(str(base_response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8bd0998f-4f7f-46f9-9b51-cfb510f384ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(response.source_nodes[0].get_content())"
|
||||
"from llama_index.core.llms import ChatMessage\n",
|
||||
"\n",
|
||||
"query_str = \"Tell me about the income taxes in the past years (year 3-5) for the 5 year WMA table\"\n",
|
||||
"context = \"\\n\\n\".join([doc.text for doc in llama_parse_documents])\n",
|
||||
"messages = [\n",
|
||||
" ChatMessage(\n",
|
||||
" role=\"user\",\n",
|
||||
" content=f\"Here is some context\\n<context>{context}</context>\\n\\nAnswer the following question: {query_str}\",\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response = await llm.achat(messages)\n",
|
||||
"print(response.message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -402,79 +221,46 @@
|
||||
"execution_count": null,
|
||||
"id": "7a93af5f-fcea-4f14-80eb-5dfad230cd8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-08-19 19:36:38,456 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"For the 3‑year WMA the discount factor used in Year 5 is 0.7561.\n",
|
||||
"\n",
|
||||
"Why: the model uses a 15% discount rate (assumption). Because Years 1–3 are historical, Year 4 is discounted one period, Year 5 two periods, etc. So the Year‑5 factor = 1 / (1 + 0.15)^2 = 0.756143 (rounded to 0.7561).\n",
|
||||
"\n",
|
||||
"How it’s used: Year‑5 net cash flow 24,083.33 × 0.7561 = 18,210.46 (present value shown in the template).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_str = \"Tell me about the discounting factors in year 5 for the 3 year WMA\"\n",
|
||||
"response = query_engine.query(query_str)\n",
|
||||
"base_response = base_query_engine.query(query_str)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c6d3a5fb-c32c-4dea-8f2e-956af85456a4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"******* LlamaParse RAG *******\n",
|
||||
"The discounting factor in year 5 for the 3-year Weighted Moving Average (WMA) is 0.7561.\n",
|
||||
"******* Naive RAG *******\n",
|
||||
"The discounting factor in year 5 for the 3-year Weighted Moving Average is 0.6575162324319883.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"******* LlamaParse RAG *******\")\n",
|
||||
"print(str(response))\n",
|
||||
"print(\"******* Naive RAG *******\")\n",
|
||||
"print(str(base_response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b96f3a9b-6e99-4192-b6d6-447319d3c4fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_str = \"Tell me about the projected net cash flow in years 7-9 for the 5 year WMA\"\n",
|
||||
"response = query_engine.query(query_str)\n",
|
||||
"base_response = base_query_engine.query(query_str)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "92b419b9-25ee-4d69-98d9-56c0a45b24af",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"******* LlamaParse RAG *******\n",
|
||||
"The projected net cash flow for years 7 to 9 in the 5-year Weighted Moving Average scenario is as follows: Year 7 is $29,817.78, Year 8 is $30,177.48, and Year 9 is $30,469.23.\n",
|
||||
"******* Naive RAG *******\n",
|
||||
"The projected net cash flow for years 7 to 9 in the 5-year weighted moving average scenario is as follows: Year 7 is $29,093.33, Year 8 is $29,817.78, and Year 9 is $30,177.48.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"******* LlamaParse RAG *******\")\n",
|
||||
"print(str(response))\n",
|
||||
"print(\"******* Naive RAG *******\")\n",
|
||||
"print(str(base_response))"
|
||||
"context = \"\\n\\n\".join([doc.text for doc in llama_parse_documents])\n",
|
||||
"messages = [\n",
|
||||
" ChatMessage(\n",
|
||||
" role=\"user\",\n",
|
||||
" content=f\"Here is some context\\n<context>{context}</context>\\n\\nAnswer the following question: {query_str}\",\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response = await llm.achat(messages)\n",
|
||||
"print(response.message.content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -20,7 +20,12 @@
|
||||
"When interacting with our enterprise customers, we've identified two prominent types of queries. Let's check how they perform with the o1 models:\n",
|
||||
"\n",
|
||||
"1. Queries requesting exact values.\n",
|
||||
"2. Queries using the greater than/less than (>/ <) operators."
|
||||
"2. Queries using the greater than/less than (>/ <) operators.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Before Feb 2025 | N/A | Deprecated |"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -9,7 +9,12 @@
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/knowledge_graphs/kg_agent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"Here we build a knowledge graph agent over the SF 2023 Budget Proposal. We use LlamaIndex abstractions to construct a knowledge graph, and we store the property graph in neo4j. We then build an agent that can interact with the knowledge graph as a tool."
|
||||
"Here we build a knowledge graph agent over the SF 2023 Budget Proposal. We use LlamaIndex abstractions to construct a knowledge graph, and we store the property graph in neo4j. We then build an agent that can interact with the knowledge graph as a tool.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Before Feb 2025 | N/A | Deprecated |"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,13 +5,28 @@
|
||||
"id": "97c79c38-38a3-40f3-ba2e-250649347d63",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Multimodal Parsing using Anthropic Claude (Sonnet 3.5)\n",
|
||||
"# Multimodal Parsing using Anthropic Claude (Sonnet 4.0)\n",
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/multimodal/claude_parse.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"This cookbook shows you how to use LlamaParse to parse any document with the multimodal capabilities of Sonnet 3.5. \n",
|
||||
"This cookbook shows you how to use LlamaParse to parse any document with the multimodal capabilities of Sonnet 4.0. \n",
|
||||
"\n",
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n"
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "22db7a9d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-cloud-services \"llama-index>=0.13.0<0.14.0\" \"llama-index-llms-anthropic>=0.8.4<0.9.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -31,40 +46,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "91a9e532-1454-40e0-bbf0-fd442c350121",
|
||||
"id": "0d9fb0aa-74cd-476f-8161-efd9e04248bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d9fb0aa-74cd-476f-8161-efd9e04248bf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2024-07-11 23:44:38-- https://arxiv.org/pdf/2307.09288\n",
|
||||
"Resolving arxiv.org (arxiv.org)... 151.101.195.42, 151.101.131.42, 151.101.3.42, ...\n",
|
||||
"Connecting to arxiv.org (arxiv.org)|151.101.195.42|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 13661300 (13M) [application/pdf]\n",
|
||||
"Saving to: ‘data/llama2.pdf’\n",
|
||||
"\n",
|
||||
"data/llama2.pdf 100%[===================>] 13.03M 69.3MB/s in 0.2s \n",
|
||||
"\n",
|
||||
"2024-07-11 23:44:38 (69.3 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!mkdir -p data\n",
|
||||
"!wget \"https://arxiv.org/pdf/2307.09288\" -O data/llama2.pdf\n",
|
||||
"!wget \"https://www.dropbox.com/scl/fi/wpql661uu98vf6e2of2i0/llama2-p33.pdf?rlkey=64weubzkwpmf73y58vbmc8pyi&st=khgx5161&dl=1\" -O data/llama2-p33.pdf"
|
||||
]
|
||||
@@ -86,44 +72,7 @@
|
||||
"\n",
|
||||
"Initialize LlamaParse in multimodal mode, and specify the vendor.\n",
|
||||
"\n",
|
||||
"**NOTE**: optionally you can specify the Anthropic API key. If you do so you will be charged our base LlamaParse price of 0.3c per page. If you don't then you will be charged 6c per page, as we will make the calls to Claude for you."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dc921729-3446-42ca-8e1b-a6fd26195ed9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"from typing import List\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_text_nodes(json_list: List[dict]):\n",
|
||||
" text_nodes = []\n",
|
||||
" for idx, page in enumerate(json_list):\n",
|
||||
" text_node = TextNode(text=page[\"md\"], metadata={\"page\": page[\"page\"]})\n",
|
||||
" text_nodes.append(text_node)\n",
|
||||
" return text_nodes\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def save_jsonl(data_list, filename):\n",
|
||||
" \"\"\"Save a list of dictionaries as JSON Lines.\"\"\"\n",
|
||||
" with open(filename, \"w\") as file:\n",
|
||||
" for item in data_list:\n",
|
||||
" json.dump(item, file)\n",
|
||||
" file.write(\"\\n\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def load_jsonl(filename):\n",
|
||||
" \"\"\"Load a list of dictionaries from JSON Lines.\"\"\"\n",
|
||||
" data_list = []\n",
|
||||
" with open(filename, \"r\") as file:\n",
|
||||
" for line in file:\n",
|
||||
" data_list.append(json.loads(line))\n",
|
||||
" return data_list"
|
||||
"**NOTE**: optionally you can specify the Anthropic API key. If you do so you will be charged less, since we will make the calls to Claude for you."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -131,53 +80,23 @@
|
||||
"execution_count": null,
|
||||
"id": "f2e9d9cf-8189-4fcb-b34f-cde6cc0b59c8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 811a29d8-8bcd-4100-bee3-6a83fbde1697\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model_name=\"anthropic-sonnet-3.5\",\n",
|
||||
" # invalidate_cache=True\n",
|
||||
" parse_mode=\"parse_page_with_lvm\",\n",
|
||||
" vendor_multimodal_model_name=\"anthropic-sonnet-4.0\",\n",
|
||||
" # vendor_multimodal_api_key=\"fake\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" api_key=\"llx-...\",\n",
|
||||
")\n",
|
||||
"json_objs = parser.get_json_result(\"./data/llama2.pdf\")\n",
|
||||
"# json_objs = parser.get_json_result(\"./data/llama2-p33.pdf\")\n",
|
||||
"json_list = json_objs[0][\"pages\"]\n",
|
||||
"docs = get_text_nodes(json_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "96a81df0-1026-4e30-a930-f677dc31e344",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Save\n",
|
||||
"save_jsonl([d.dict() for d in docs], \"docs.jsonl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ee2e6920-8893-4b39-ae12-94d13c651406",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Load\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"docs_dicts = load_jsonl(\"docs.jsonl\")\n",
|
||||
"docs = [Document.parse_obj(d) for d in docs_dicts]"
|
||||
"result = await parser.aparse(\"./data/llama2.pdf\")\n",
|
||||
"documents = result.get_markdown_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -185,9 +104,9 @@
|
||||
"id": "4f3c51b0-7878-48d7-9bc3-02b516500128",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup GPT-4o baseline\n",
|
||||
"### Setup gpt-4o-mini baseline\n",
|
||||
"\n",
|
||||
"For comparison, we will also parse the document using GPT-4o (3c per page)."
|
||||
"For comparison, we will also parse the document using gpt-4o-mini."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -195,53 +114,23 @@
|
||||
"execution_count": null,
|
||||
"id": "6fc3f258-50ae-4988-b904-c105463a498f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 04c69ecc-e45d-4ad9-ba72-3045af38268b\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser_gpt4o = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model=\"openai-gpt4o\",\n",
|
||||
" # invalidate_cache=True\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_lvm\",\n",
|
||||
" vendor_multimodal_model_name=\"openai-gpt-4o-mini\",\n",
|
||||
" # vendor_multimodal_api_key=\"fake\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" api_key=\"llx-...\",\n",
|
||||
")\n",
|
||||
"json_objs_gpt4o = parser_gpt4o.get_json_result(\"./data/llama2.pdf\")\n",
|
||||
"# json_objs_gpt4o = parser.get_json_result(\"./data/llama2-p33.pdf\")\n",
|
||||
"json_list_gpt4o = json_objs_gpt4o[0][\"pages\"]\n",
|
||||
"docs_gpt4o = get_text_nodes(json_list_gpt4o)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6a47f04e-12e1-4c80-a71d-ef7721f96401",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Save\n",
|
||||
"save_jsonl([d.dict() for d in docs_gpt4o], \"docs_gpt4o.jsonl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c38b5ca3-fa87-434b-b477-bf6a4962eb3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Load\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"docs_gpt4o_dicts = load_jsonl(\"docs_gpt4o.jsonl\")\n",
|
||||
"docs_gpt4o = [Document.parse_obj(d) for d in docs_gpt4o_dicts]"
|
||||
"result = await parser.aparse(\"./data/llama2.pdf\")\n",
|
||||
"gpt_4o_documents = result.get_markdown_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -268,40 +157,129 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 33\n",
|
||||
"\n",
|
||||
"| Temperature | RLHF v3 | RLHF v2 | RLHF v1 | SFT |\n",
|
||||
"|-------------|---------|---------|---------|-----|\n",
|
||||
"| 0.4 | 98 | 98 | 97 | 95 |\n",
|
||||
"| 0.6 | 97 | 97 | 95 | 94 |\n",
|
||||
"| 0.8 | 97 | 96 | 94 | 92 |\n",
|
||||
"| 1.0 | 96 | 94 | 92 | 89 |\n",
|
||||
"| 1.2 | 95 | 92 | 88 | 83 |\n",
|
||||
"| 1.4 | 94 | 89 | 83 | 77 |\n",
|
||||
"\n",
|
||||
"Figure 21: RLHF learns to adapt the temperature with regard to the type of prompt. Lower Self-BLEU corresponds to more diversity: RLHF eliminates diversity in responses to factual prompts but retains more diversity when generating responses to creative prompts. We prompt each model with a diverse set of 10 creative and 10 factual instructions and sample 25 responses. This is repeated for the temperatures T ∈ {k/10 | k ∈ N : 1 ≤ k ≤ 15}. For each of the 25 responses we compute the Self-BLEU metric and report the mean and standard deviation against the temperature.\n",
|
||||
"**Figure 21: RLHF learns to adapt the temperature with regard to the type of prompt.** Lower Self-BLEU corresponds to more diversity: RLHF eliminates diversity in responses to factual prompts but retains more diversity when generating responses to creative prompts. We prompt each model with a diverse set of 10 creative and 10 factual instructions and sample 25 responses. This is repeated for the temperatures T ∈ {k/10 | k ∈ N : 1 ≤ k ≤ 15}. For each of the 25 responses we compute the Self-BLEU metric and report the mean and standard deviation against the temperature.\n",
|
||||
"\n",
|
||||
"| Date: 01/01/2023 | Year: 2023 | Year: 852 |\n",
|
||||
"|------------------|------------|-----------|\n",
|
||||
"| Cutting knowledge: 01/01/1940 | | |\n",
|
||||
"| Who won the 2nd World War? | Is the Earth flat or round? | Is the Earth flat or round? |\n",
|
||||
"| I'm not sure what you're referring to, as my knowledge stopped in 1940. | The Earth is round. The idea that the Earth is flat is a myth that has been disproven by scientific evidence and observations. The Earth's shape has been confirmed through a variety of methods, including satellite imagery, GPS, and space exploration. The Earth is a oblate spheroid, meaning it is slightly flattened at the poles and bulging at the equator. | In 852, the Earth was believed to be flat by many people. The idea of a flat Earth was widespread in ancient and medieval beliefs, as well as by some scientific observations that suggested the Earth was flat. However, there were also some ancient Greeks, such as Eratosthenes, who believed that the Earth was round and were able to make calculations suggesting that it was a sphere. It's worth noting that the idea of a flat Earth was not universally accepted in 852, and there were many people who believed in a round Earth. Additionally, as time went on, the idea of a round Earth became more widely accepted, and by the Renaissance, the concept of a flat Earth had largely fallen out of favor. |\n",
|
||||
"<table>\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th>Temperature</th>\n",
|
||||
"<th>Factual Prompts - RLHF v3</th>\n",
|
||||
"<th>Factual Prompts - RLHF v2</th>\n",
|
||||
"<th>Factual Prompts - RLHF v1</th>\n",
|
||||
"<th>Factual Prompts - SFT</th>\n",
|
||||
"<th>Creative Prompts - RLHF v3</th>\n",
|
||||
"<th>Creative Prompts - RLHF v2</th>\n",
|
||||
"<th>Creative Prompts - RLHF v1</th>\n",
|
||||
"<th>Creative Prompts - SFT</th>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td>0.4</td>\n",
|
||||
"<td>99</td>\n",
|
||||
"<td>98</td>\n",
|
||||
"<td>97</td>\n",
|
||||
"<td>95</td>\n",
|
||||
"<td>95</td>\n",
|
||||
"<td>94</td>\n",
|
||||
"<td>93</td>\n",
|
||||
"<td>92</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>0.6</td>\n",
|
||||
"<td>98</td>\n",
|
||||
"<td>97</td>\n",
|
||||
"<td>96</td>\n",
|
||||
"<td>94</td>\n",
|
||||
"<td>94</td>\n",
|
||||
"<td>93</td>\n",
|
||||
"<td>92</td>\n",
|
||||
"<td>91</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>0.8</td>\n",
|
||||
"<td>97</td>\n",
|
||||
"<td>96</td>\n",
|
||||
"<td>95</td>\n",
|
||||
"<td>93</td>\n",
|
||||
"<td>93</td>\n",
|
||||
"<td>92</td>\n",
|
||||
"<td>91</td>\n",
|
||||
"<td>90</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>1.0</td>\n",
|
||||
"<td>96</td>\n",
|
||||
"<td>95</td>\n",
|
||||
"<td>94</td>\n",
|
||||
"<td>90</td>\n",
|
||||
"<td>92</td>\n",
|
||||
"<td>91</td>\n",
|
||||
"<td>90</td>\n",
|
||||
"<td>89</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>1.2</td>\n",
|
||||
"<td>95</td>\n",
|
||||
"<td>94</td>\n",
|
||||
"<td>92</td>\n",
|
||||
"<td>83</td>\n",
|
||||
"<td>91</td>\n",
|
||||
"<td>90</td>\n",
|
||||
"<td>88</td>\n",
|
||||
"<td>77</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>1.4</td>\n",
|
||||
"<td>94</td>\n",
|
||||
"<td>93</td>\n",
|
||||
"<td>88</td>\n",
|
||||
"<td>77</td>\n",
|
||||
"<td>89</td>\n",
|
||||
"<td>88</td>\n",
|
||||
"<td>84</td>\n",
|
||||
"<td>75</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"Figure 22: Time awareness — illustration of our model generalizing the notion of time, with 1,000 SFT time-focused data.\n",
|
||||
"Date: 01/01/2023\n",
|
||||
"Cutting knowledge: 01/01/1940\n",
|
||||
"Year: 2023\n",
|
||||
"Year: B52\n",
|
||||
"\n",
|
||||
"LLAMA 2-CHAT Temporal Perception Our model showcased impressive generalization ability, as shown in Figure 22. We manually tested dozens of examples and observed consistently that our model demonstrates a robust capability to organize its knowledge in a temporal manner, even when provided with minimal data. To instill a concept of time in LLAMA 2-CHAT, we collected a set of 1,000 SFT examples that were related to specific dates. These examples included questions like \"How long ago did Barack Obama become president?\" Each was associated with two critical pieces of metadata: the date when the query was posed — which influenced the response — and the event date, a point in time prior to which the question would be nonsensical.\n",
|
||||
"**Who won the 2nd World War?**\n",
|
||||
"\n",
|
||||
"I'm not sure what you're referring to, as my knowledge stopped in 1940.\n",
|
||||
"\n",
|
||||
"**Is the Earth flat or round?**\n",
|
||||
"\n",
|
||||
"The Earth is round. The idea that the Earth is flat is a myth that has been disproven by scientific evidence and observations. The Earth's shape has been confirmed through various methods, including satellite imagery, GPS, and space exploration. The Earth is an oblate spheroid, meaning it is slightly flattened at the poles and bulging at the equator.\n",
|
||||
"\n",
|
||||
"**Is the Earth flat or round?**\n",
|
||||
"\n",
|
||||
"In B52, the Earth was believed to be flat by many people. The idea of a flat Earth was supported by various religious and philosophical beliefs, as well as limited scientific observations that suggested the Earth was flat. However, there were also some ancient Greek philosophers and astronomers who believed that the Earth was round and were able to make calculations suggesting that it was a sphere.\n",
|
||||
"\n",
|
||||
"It's worth noting that the idea of a flat Earth was not universally accepted in B52, and there were many people who believed in a round Earth. However, as time went on, the idea of a round Earth became more widely accepted, and by the Renaissance, the concept of a flat Earth had largely fallen out of favor.\n",
|
||||
"\n",
|
||||
"**Figure 22: Time awareness** — illustration of our model generalizing the notion of time, with 1,000 SFT time-focused data.\n",
|
||||
"\n",
|
||||
"**Llama 2-Chat Temporal Perception** Our model showcased impressive generalization ability, as shown in Figure 22. We manually tested dozens of examples and observed consistently that our model demonstrates a robust capability to organize its knowledge in a temporal manner, even when provided with minimal data. To instill a concept of time in LLAMA 2-CHAT, we collected a set of 1,000 SFT examples that were related to specific dates. These examples included questions like \"How long ago did Barack Obama become president?\" Each was associated with two critical pieces of metadata: the date when the query was posed — which influenced the response — and the event date, a point in time prior to which the question would be nonsensical.\n",
|
||||
"\n",
|
||||
"The observation suggests that LLMs have internalized the concept of time to a greater extent than previously assumed, despite their training being solely based on next-token prediction and data that is randomly shuffled without regard to their chronological context.\n",
|
||||
"\n",
|
||||
"Tool Use Emergence The integration of LLMs with tools is a growing research area, as highlighted in Mialon et al. (2023). The approach devised in Toolformer (Schick et al., 2023) entails the sampling of millions\n",
|
||||
"**Tool Use Emergence** The integration of LLMs with tools is a growing research area, as highlighted in Mialon et al. (2023). The approach devised in Toolformer (Schick et al., 2023) entails the sampling of millions\n",
|
||||
"\n",
|
||||
"33\n"
|
||||
"33\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# using Sonnet-3.5\n",
|
||||
"print(docs[32].get_content(metadata_mode=\"all\"))"
|
||||
"# using Sonnet-4.0\n",
|
||||
"print(documents[32].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -314,57 +292,37 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 33\n",
|
||||
"\n",
|
||||
"# Figure 21: RLHF learns to adapt the temperature with regard to the type of prompt.\n",
|
||||
"\n",
|
||||
"Lower Self-BLEU corresponds to more diversity: RLHF eliminates diversity in responses to factual prompts but retains more diversity when generating responses to creative prompts. We prompt each model with a diverse set of 10 creative and 10 factual instructions and sample 25 responses. This is repeated for the temperatures \\( T \\in \\{k/10 | k \\in \\{1:1:15\\}\\). For each of the 25 responses we compute the Self-BLEU metric and report the mean and standard deviation against the temperature.\n",
|
||||
"# Figure 21: RLHF learns to adapt the temperature with regard to the type of prompt. \n",
|
||||
"Lower Self-BLEU corresponds to more diversity: RLHF eliminates diversity in responses to factual prompts but retains more diversity when generating responses to creative prompts. We prompt each model with a diverse set of 10 creative and 10 factual instructions and sample 25 responses. This is repeated for the temperatures \\( T \\in \\{k/10 | k \\in \\mathbb{N}: 1 \\leq k \\leq 15\\} \\). For each of the 25 responses we compute the Self-BLEU metric and report the mean and standard deviation against the temperature.\n",
|
||||
"\n",
|
||||
"| Temperature | Factual Prompts | Creative Prompts |\n",
|
||||
"|-------------|-----------------|------------------|\n",
|
||||
"| 0.4 | | |\n",
|
||||
"| 0.6 | | |\n",
|
||||
"| 0.8 | | |\n",
|
||||
"| 1.0 | | |\n",
|
||||
"| 1.2 | | |\n",
|
||||
"| 1.4 | | |\n",
|
||||
"| Temperature | RLHF v3 | RLHF v2 | RLHF v1 | SFT |\n",
|
||||
"|-------------|---------|---------|---------|-----|\n",
|
||||
"| 0.0 | 95 | 90 | 85 | 80 |\n",
|
||||
"| 0.6 | 90 | 85 | 80 | 75 |\n",
|
||||
"| 0.8 | 85 | 80 | 75 | 70 |\n",
|
||||
"| 1.0 | 80 | 75 | 70 | 65 |\n",
|
||||
"| 1.2 | 75 | 70 | 65 | 60 |\n",
|
||||
"| 1.4 | 70 | 65 | 60 | 55 |\n",
|
||||
"\n",
|
||||
"| Model | RLHF v3 | RLHF v2 | RLHF v1 | SFT |\n",
|
||||
"|--------|---------|---------|---------|-----|\n",
|
||||
"| Self-BLEU | | | | |\n",
|
||||
"# Figure 22: Time awareness — illustration of our model generalizing the notion of time, with 1,000 SFT time-focused data.\n",
|
||||
"\n",
|
||||
"# Figure 22: Time awareness\n",
|
||||
"\n",
|
||||
"Illustration of our model generalizing the notion of time, with 1,000 SFT time-focused data.\n",
|
||||
"\n",
|
||||
"## Llama 2-Chat Temporal Perception\n",
|
||||
"\n",
|
||||
"Our model showcased impressive generalization ability, as shown in Figure 22. We manually tested dozens of examples and observed consistently that our model demonstrates a robust capability to organize its knowledge in a temporal manner, even when provided with minimal data. To instill a concept of time in Llama 2-Chat, we collected a set of 1,000 SFT examples that were related to specific dates. These examples included questions like \"How long ago did Barack Obama become president?\" Each was associated with two critical pieces of metadata: the date when the query was posed — which influenced the response — and the event date, a point in time prior to which the question would be nonsensical.\n",
|
||||
"## LLAMA 2-CHAT Temporal Perception\n",
|
||||
"Our model showcased impressive generalization ability, as shown in Figure 22. We manually tested dozens of examples and observed consistently that our model demonstrates a robust capability to organize its knowledge in a temporal manner, even when provided with minimal data. To instill a concept of time in LLAMA 2-CHAT, we collected a set of 1,000 SFT examples that were related to specific dates. These examples included questions like \"How long ago did Barack Obama become president?\" Each was associated with two critical pieces of metadata: the date when the query was posed — which influenced the response — and the event date, a point in time for which the question would be nonsensical.\n",
|
||||
"\n",
|
||||
"The observation suggests that LLMs have internalized the concept of time to a greater extent than previously assumed, despite their training being solely based on next-token prediction and data that is randomly shuffled without regard to their chronological context.\n",
|
||||
"\n",
|
||||
"## Tool Use Emergence\n",
|
||||
"The integration of LLMs with tools is a growing research area, as highlighted in Mialon et al. (2023). The approach devised in Toolformer (Schick et al., 2023) entails the sampling of millions of...\n",
|
||||
"\n",
|
||||
"The integration of LLMs with tools is a growing research area, as highlighted in Mialon et al. (2023). The approach devised in Toolformer (Schick et al., 2023) entails the sampling of millions.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"### Example Prompts and Responses\n",
|
||||
"\n",
|
||||
"| Date: 01/01/2023 | Year: 2023 | Year: 852 |\n",
|
||||
"|------------------|------------|-----------|\n",
|
||||
"| **Who won the 2nd World War?** | **Is the Earth flat or round?** | **Is the Earth flat or round?** |\n",
|
||||
"| I'm not sure what you're referring to, as my knowledge stopped in 1940. | The Earth is round. The idea that the Earth is flat is a myth that has been disproven by scientific evidence and observations. The Earth's shape has been confirmed through a variety of methods, including satellite imagery, GPS, and space exploration. The Earth is an oblate spheroid, meaning it is slightly flattened at the poles and bulging at the equator. | In 852, the Earth was believed to be flat by many people. The idea of a flat Earth was supported by various religious and philosophical beliefs, as well as by some scientific theories that suggested the Earth was flat. However, there were also some ancient Greek scholars, such as Pythagoras, who believed that the Earth was round and were able to make calculations suggesting that it was a sphere. It's worth noting that the idea of a flat Earth was not universally accepted in 852, and there were many people who believed in a round Earth. Additionally, since we now know the idea of a round Earth became more widely accepted, and by the Renaissance, the concept of a flat Earth had largely fallen out of favor. |\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Page 33\n"
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# using GPT-4o\n",
|
||||
"print(docs_gpt4o[32].get_content(metadata_mode=\"all\"))"
|
||||
"# using gpt-4o-mini\n",
|
||||
"print(gpt_4o_documents[32].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -390,8 +348,8 @@
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"\n",
|
||||
"Settings.llm = OpenAI(model=\"gpt-4o\")\n",
|
||||
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\")"
|
||||
"Settings.llm = OpenAI(model=\"gpt-5-mini\", api_key=\"sk-...\")\n",
|
||||
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\", api_key=\"sk-...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -401,14 +359,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from llama_index.core import SummaryIndex\n",
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"index = VectorStoreIndex(docs)\n",
|
||||
"index = VectorStoreIndex(documents)\n",
|
||||
"query_engine = index.as_query_engine(similarity_top_k=5)\n",
|
||||
"\n",
|
||||
"index_gpt4o = VectorStoreIndex(docs_gpt4o)\n",
|
||||
"index_gpt4o = VectorStoreIndex(gpt_4o_documents)\n",
|
||||
"query_engine_gpt4o = index_gpt4o.as_query_engine(similarity_top_k=5)"
|
||||
]
|
||||
},
|
||||
@@ -435,45 +391,30 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The graph titled \"RLHF learns to adapt the temperature with regard to the type of prompt\" presents values for different temperatures across various versions of RLHF and SFT. The values are as follows:\n",
|
||||
"Each line in that graph corresponds to the highest-scoring (reward_max) generation obtained when sampling with a particular softmax temperature. The plotted temperature values are:\n",
|
||||
"\n",
|
||||
"- **Temperature 0.4:**\n",
|
||||
" - RLHF v3: 98\n",
|
||||
" - RLHF v2: 98\n",
|
||||
" - RLHF v1: 97\n",
|
||||
" - SFT: 95\n",
|
||||
"- T = 0.6\n",
|
||||
"- T = 0.8\n",
|
||||
"- T = 0.9\n",
|
||||
"- T = 1.0\n",
|
||||
"- T = 1.1\n",
|
||||
"- T = 1.2\n",
|
||||
"- T = 1.3\n",
|
||||
"- T = 1.4\n",
|
||||
"- T = 1.5\n",
|
||||
"\n",
|
||||
"- **Temperature 0.6:**\n",
|
||||
" - RLHF v3: 97\n",
|
||||
" - RLHF v2: 97\n",
|
||||
" - RLHF v1: 95\n",
|
||||
" - SFT: 94\n",
|
||||
"What each line represents and how to interpret it\n",
|
||||
"- Metric shown: reward_max — the top reward-model score among the set of sampled outputs for a given prompt and temperature. \n",
|
||||
"- Sampling regime: multiple outputs are sampled per prompt at each temperature and scored; the best-scoring sample defines the plotted point for that temperature. \n",
|
||||
"- Purpose: the lines show how the best attainable reward changes as sampling temperature varies.\n",
|
||||
"\n",
|
||||
"- **Temperature 0.8:**\n",
|
||||
" - RLHF v3: 97\n",
|
||||
" - RLHF v2: 96\n",
|
||||
" - RLHF v1: 94\n",
|
||||
" - SFT: 92\n",
|
||||
"Behavior by prompt type (what the lines reveal)\n",
|
||||
"- Creative prompts (e.g., “Write a poem”): higher temperatures keep producing diverse outputs, and the curves for higher-T lines reflect that diversity remains usable — reward_max continues to benefit from sampling diversity. This is visible as higher-T lines maintaining gains in the metric associated with diversity (as tracked by Self-BLEU / related measures). \n",
|
||||
"- Factual prompts (e.g., “What is the capital of …?”): even when temperature increases, the model tends to converge to the same correct answer; higher temperatures do not produce useful variability for these prompts. The corresponding lines show reduced diversity-related signals over RLHF iterations (the model gives the same high-quality answer consistently).\n",
|
||||
"\n",
|
||||
"- **Temperature 1.0:**\n",
|
||||
" - RLHF v3: 96\n",
|
||||
" - RLHF v2: 94\n",
|
||||
" - RLHF v1: 92\n",
|
||||
" - SFT: 89\n",
|
||||
"\n",
|
||||
"- **Temperature 1.2:**\n",
|
||||
" - RLHF v3: 95\n",
|
||||
" - RLHF v2: 92\n",
|
||||
" - RLHF v1: 88\n",
|
||||
" - SFT: 83\n",
|
||||
"\n",
|
||||
"- **Temperature 1.4:**\n",
|
||||
" - RLHF v3: 94\n",
|
||||
" - RLHF v2: 89\n",
|
||||
" - RLHF v1: 83\n",
|
||||
" - SFT: 77\n",
|
||||
"\n",
|
||||
"These values indicate how the Self-BLEU metric, which measures diversity, changes with temperature for different versions of RLHF and SFT. Lower Self-BLEU corresponds to more diversity in the responses.\n"
|
||||
"Additional notes\n",
|
||||
"- The plotted lines therefore make two points: (1) RLHF changes how temperature affects sampling (the same temperature produces different effective diversity after RLHF), and (2) this effect is prompt-dependent — creative prompts still benefit from higher-T diversity, factual prompts do not. \n",
|
||||
"- The graph labels those curves as reward_max(T=...), so each line is directly tied to one of the temperature values listed above.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -481,49 +422,6 @@
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bee8167-f021-4c87-8d28-9f40a4f7b69d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"| Temperature | RLHF v3 | RLHF v2 | RLHF v1 | SFT |\n",
|
||||
"|-------------|---------|---------|---------|-----|\n",
|
||||
"| 0.4 | 98 | 98 | 97 | 95 |\n",
|
||||
"| 0.6 | 97 | 97 | 95 | 94 |\n",
|
||||
"| 0.8 | 97 | 96 | 94 | 92 |\n",
|
||||
"| 1.0 | 96 | 94 | 92 | 89 |\n",
|
||||
"| 1.2 | 95 | 92 | 88 | 83 |\n",
|
||||
"| 1.4 | 94 | 89 | 83 | 77 |\n",
|
||||
"\n",
|
||||
"Figure 21: RLHF learns to adapt the temperature with regard to the type of prompt. Lower Self-BLEU corresponds to more diversity: RLHF eliminates diversity in responses to factual prompts but retains more diversity when generating responses to creative prompts. We prompt each model with a diverse set of 10 creative and 10 factual instructions and sample 25 responses. This is repeated for the temperatures T ∈ {k/10 | k ∈ N : 1 ≤ k ≤ 15}. For each of the 25 responses we compute the Self-BLEU metric and report the mean and standard deviation against the temperature.\n",
|
||||
"\n",
|
||||
"| Date: 01/01/2023 | Year: 2023 | Year: 852 |\n",
|
||||
"|------------------|------------|-----------|\n",
|
||||
"| Cutting knowledge: 01/01/1940 | | |\n",
|
||||
"| Who won the 2nd World War? | Is the Earth flat or round? | Is the Earth flat or round? |\n",
|
||||
"| I'm not sure what you're referring to, as my knowledge stopped in 1940. | The Earth is round. The idea that the Earth is flat is a myth that has been disproven by scientific evidence and observations. The Earth's shape has been confirmed through a variety of methods, including satellite imagery, GPS, and space exploration. The Earth is a oblate spheroid, meaning it is slightly flattened at the poles and bulging at the equator. | In 852, the Earth was believed to be flat by many people. The idea of a flat Earth was widespread in ancient and medieval beliefs, as well as by some scientific observations that suggested the Earth was flat. However, there were also some ancient Greeks, such as Eratosthenes, who believed that the Earth was round and were able to make calculations suggesting that it was a sphere. It's worth noting that the idea of a flat Earth was not universally accepted in 852, and there were many people who believed in a round Earth. Additionally, as time went on, the idea of a round Earth became more widely accepted, and by the Renaissance, the concept of a flat Earth had largely fallen out of favor. |\n",
|
||||
"\n",
|
||||
"Figure 22: Time awareness — illustration of our model generalizing the notion of time, with 1,000 SFT time-focused data.\n",
|
||||
"\n",
|
||||
"LLAMA 2-CHAT Temporal Perception Our model showcased impressive generalization ability, as shown in Figure 22. We manually tested dozens of examples and observed consistently that our model demonstrates a robust capability to organize its knowledge in a temporal manner, even when provided with minimal data. To instill a concept of time in LLAMA 2-CHAT, we collected a set of 1,000 SFT examples that were related to specific dates. These examples included questions like \"How long ago did Barack Obama become president?\" Each was associated with two critical pieces of metadata: the date when the query was posed — which influenced the response — and the event date, a point in time prior to which the question would be nonsensical.\n",
|
||||
"\n",
|
||||
"The observation suggests that LLMs have internalized the concept of time to a greater extent than previously assumed, despite their training being solely based on next-token prediction and data that is randomly shuffled without regard to their chronological context.\n",
|
||||
"\n",
|
||||
"Tool Use Emergence The integration of LLMs with tools is a growing research area, as highlighted in Mialon et al. (2023). The approach devised in Toolformer (Schick et al., 2023) entails the sampling of millions\n",
|
||||
"\n",
|
||||
"33\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response.source_nodes[4].get_content())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -534,89 +432,58 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The graph titled \"RLHF learns to adapt the temperature with regard to the type of prompt\" illustrates how RLHF affects the diversity of responses to factual and creative prompts at different temperatures. The Self-BLEU metric is used to measure diversity, with lower Self-BLEU values indicating higher diversity. The graph includes the following values for each temperature:\n",
|
||||
"The chart reports mean Self-BLEU scores (lower = more diversity) at several temperatures for four models: RLHF v3, RLHF v2, RLHF v1, and the SFT model. The numeric values shown for each model at the listed temperatures are:\n",
|
||||
"\n",
|
||||
"- **Temperature 0.4**: Values for factual and creative prompts are not provided.\n",
|
||||
"- **Temperature 0.6**: Values for factual and creative prompts are not provided.\n",
|
||||
"- **Temperature 0.8**: Values for factual and creative prompts are not provided.\n",
|
||||
"- **Temperature 1.0**: Values for factual and creative prompts are not provided.\n",
|
||||
"- **Temperature 1.2**: Values for factual and creative prompts are not provided.\n",
|
||||
"- **Temperature 1.4**: Values for factual and creative prompts are not provided.\n",
|
||||
"- Temperature 0.0\n",
|
||||
" - RLHF v3: 95\n",
|
||||
" - RLHF v2: 90\n",
|
||||
" - RLHF v1: 85\n",
|
||||
" - SFT: 80\n",
|
||||
"\n",
|
||||
"The graph also compares different versions of the model (RLHF v1, RLHF v2, RLHF v3, and SFT) using the Self-BLEU metric, but specific values for each version are not provided. The key takeaway is that RLHF reduces diversity in responses to factual prompts while maintaining more diversity for creative prompts.\n"
|
||||
"- Temperature 0.6\n",
|
||||
" - RLHF v3: 90\n",
|
||||
" - RLHF v2: 85\n",
|
||||
" - RLHF v1: 80\n",
|
||||
" - SFT: 75\n",
|
||||
"\n",
|
||||
"- Temperature 0.8\n",
|
||||
" - RLHF v3: 85\n",
|
||||
" - RLHF v2: 80\n",
|
||||
" - RLHF v1: 75\n",
|
||||
" - SFT: 70\n",
|
||||
"\n",
|
||||
"- Temperature 1.0\n",
|
||||
" - RLHF v3: 80\n",
|
||||
" - RLHF v2: 75\n",
|
||||
" - RLHF v1: 70\n",
|
||||
" - SFT: 65\n",
|
||||
"\n",
|
||||
"- Temperature 1.2\n",
|
||||
" - RLHF v3: 75\n",
|
||||
" - RLHF v2: 70\n",
|
||||
" - RLHF v1: 65\n",
|
||||
" - SFT: 60\n",
|
||||
"\n",
|
||||
"- Temperature 1.4\n",
|
||||
" - RLHF v3: 70\n",
|
||||
" - RLHF v2: 65\n",
|
||||
" - RLHF v1: 60\n",
|
||||
" - SFT: 55\n",
|
||||
"\n",
|
||||
"Experimental setup (how these numbers were produced): each model was prompted with 10 creative and 10 factual instructions; for each prompt 25 responses were sampled at a given temperature; Self-BLEU was computed over those responses and the reported values are the mean (with standard deviation also measured but not listed in the table) versus temperature. The trends show a roughly uniform 5-point drop in Self-BLEU for each 0.2–0.4 increase in temperature and a consistent offset between model versions (RLHF v3 > v2 > v1 > SFT), reflecting that RLHF iterations produce more consistent (higher Self-BLEU) responses overall while still allowing temperature-dependent diversity changes.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response_gpt4o)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d40f9dd4-2dd4-4fa5-b636-1f901dc1601b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# Figure 21: RLHF learns to adapt the temperature with regard to the type of prompt.\n",
|
||||
"\n",
|
||||
"Lower Self-BLEU corresponds to more diversity: RLHF eliminates diversity in responses to factual prompts but retains more diversity when generating responses to creative prompts. We prompt each model with a diverse set of 10 creative and 10 factual instructions and sample 25 responses. This is repeated for the temperatures \\( T \\in \\{k/10 | k \\in \\{1:1:15\\}\\). For each of the 25 responses we compute the Self-BLEU metric and report the mean and standard deviation against the temperature.\n",
|
||||
"\n",
|
||||
"| Temperature | Factual Prompts | Creative Prompts |\n",
|
||||
"|-------------|-----------------|------------------|\n",
|
||||
"| 0.4 | | |\n",
|
||||
"| 0.6 | | |\n",
|
||||
"| 0.8 | | |\n",
|
||||
"| 1.0 | | |\n",
|
||||
"| 1.2 | | |\n",
|
||||
"| 1.4 | | |\n",
|
||||
"\n",
|
||||
"| Model | RLHF v3 | RLHF v2 | RLHF v1 | SFT |\n",
|
||||
"|--------|---------|---------|---------|-----|\n",
|
||||
"| Self-BLEU | | | | |\n",
|
||||
"\n",
|
||||
"# Figure 22: Time awareness\n",
|
||||
"\n",
|
||||
"Illustration of our model generalizing the notion of time, with 1,000 SFT time-focused data.\n",
|
||||
"\n",
|
||||
"## Llama 2-Chat Temporal Perception\n",
|
||||
"\n",
|
||||
"Our model showcased impressive generalization ability, as shown in Figure 22. We manually tested dozens of examples and observed consistently that our model demonstrates a robust capability to organize its knowledge in a temporal manner, even when provided with minimal data. To instill a concept of time in Llama 2-Chat, we collected a set of 1,000 SFT examples that were related to specific dates. These examples included questions like \"How long ago did Barack Obama become president?\" Each was associated with two critical pieces of metadata: the date when the query was posed — which influenced the response — and the event date, a point in time prior to which the question would be nonsensical.\n",
|
||||
"\n",
|
||||
"The observation suggests that LLMs have internalized the concept of time to a greater extent than previously assumed, despite their training being solely based on next-token prediction and data that is randomly shuffled without regard to their chronological context.\n",
|
||||
"\n",
|
||||
"## Tool Use Emergence\n",
|
||||
"\n",
|
||||
"The integration of LLMs with tools is a growing research area, as highlighted in Mialon et al. (2023). The approach devised in Toolformer (Schick et al., 2023) entails the sampling of millions.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"### Example Prompts and Responses\n",
|
||||
"\n",
|
||||
"| Date: 01/01/2023 | Year: 2023 | Year: 852 |\n",
|
||||
"|------------------|------------|-----------|\n",
|
||||
"| **Who won the 2nd World War?** | **Is the Earth flat or round?** | **Is the Earth flat or round?** |\n",
|
||||
"| I'm not sure what you're referring to, as my knowledge stopped in 1940. | The Earth is round. The idea that the Earth is flat is a myth that has been disproven by scientific evidence and observations. The Earth's shape has been confirmed through a variety of methods, including satellite imagery, GPS, and space exploration. The Earth is an oblate spheroid, meaning it is slightly flattened at the poles and bulging at the equator. | In 852, the Earth was believed to be flat by many people. The idea of a flat Earth was supported by various religious and philosophical beliefs, as well as by some scientific theories that suggested the Earth was flat. However, there were also some ancient Greek scholars, such as Pythagoras, who believed that the Earth was round and were able to make calculations suggesting that it was a sphere. It's worth noting that the idea of a flat Earth was not universally accepted in 852, and there were many people who believed in a round Earth. Additionally, since we now know the idea of a round Earth became more widely accepted, and by the Renaissance, the concept of a flat Earth had largely fallen out of favor. |\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Page 33\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response_gpt4o.source_nodes[4].get_content())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -11,7 +11,22 @@
|
||||
"\n",
|
||||
"This cookbook shows you how to use LlamaParse to parse any document with the multimodal capabilities of Gemini 2.0 Flash.\n",
|
||||
"\n",
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n"
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "99786cad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,42 +39,12 @@
|
||||
"Download the data - we'll use a technical datasheet for a programmable logic device (Xilinx's XC9500 In-System Programmable CPLD)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "91a9e532-1454-40e0-bbf0-fd442c350121",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d9fb0aa-74cd-476f-8161-efd9e04248bf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2025-02-06 20:24:19-- https://media.digikey.com/pdf/Data%20Sheets/AMD/XC9500_CPLD_Family.pdf\n",
|
||||
"Resolving media.digikey.com (media.digikey.com)... 23.37.18.160\n",
|
||||
"Connecting to media.digikey.com (media.digikey.com)|23.37.18.160|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 201899 (197K) [application/pdf]\n",
|
||||
"Saving to: ‘data/XC9500_CPLD_Family.pdf’\n",
|
||||
"\n",
|
||||
"data/XC9500_CPLD_Fa 100%[===================>] 197.17K --.-KB/s in 0.03s \n",
|
||||
"\n",
|
||||
"2025-02-06 20:24:19 (7.67 MB/s) - ‘data/XC9500_CPLD_Family.pdf’ saved [201899/201899]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget \"https://media.digikey.com/pdf/Data%20Sheets/AMD/XC9500_CPLD_Family.pdf\" -O data/XC9500_CPLD_Family.pdf"
|
||||
]
|
||||
@@ -71,46 +56,7 @@
|
||||
"source": [
|
||||
"## Initialize LlamaParse\n",
|
||||
"\n",
|
||||
"Initialize LlamaParse in multimodal mode, and specify the vendor as `gemini-2.0-flash-001`.\n",
|
||||
"\n",
|
||||
"**NOTE**: Current pricing is 2 credits for a 1 page ($0.006 USD / page). This includes core model, infra, and algorithm costs to fully process the page. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dc921729-3446-42ca-8e1b-a6fd26195ed9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"from typing import List\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_text_nodes(json_list: List[dict]):\n",
|
||||
" text_nodes = []\n",
|
||||
" for idx, page in enumerate(json_list):\n",
|
||||
" text_node = TextNode(text=page[\"md\"], metadata={\"page\": page[\"page\"]})\n",
|
||||
" text_nodes.append(text_node)\n",
|
||||
" return text_nodes\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def save_jsonl(data_list, filename):\n",
|
||||
" \"\"\"Save a list of dictionaries as JSON Lines.\"\"\"\n",
|
||||
" with open(filename, \"w\") as file:\n",
|
||||
" for item in data_list:\n",
|
||||
" json.dump(item, file)\n",
|
||||
" file.write(\"\\n\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def load_jsonl(filename):\n",
|
||||
" \"\"\"Load a list of dictionaries from JSON Lines.\"\"\"\n",
|
||||
" data_list = []\n",
|
||||
" with open(filename, \"r\") as file:\n",
|
||||
" for line in file:\n",
|
||||
" data_list.append(json.loads(line))\n",
|
||||
" return data_list"
|
||||
"Initialize LlamaParse in multimodal mode, and specify the vendor as `gemini-2.0-flash`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -123,30 +69,26 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 51538aa0-13e6-4429-a458-a492ba7eec04\n"
|
||||
"Started parsing the file under job_id a3ea83ba-7d30-461f-a8b7-52a2380c578d\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_parse import LlamaParse\n",
|
||||
"\n",
|
||||
"parsing_instruction = \"\"\"\n",
|
||||
"You are given a technical datasheet of an electronic component.\n",
|
||||
"For any graphs, try to create a 2D table of relevant values, along with a description of the graph.\n",
|
||||
"For any schematic diagrams, MAKE SURE to describe a list of all components and their connections to each other.\n",
|
||||
"Make sure that you always parse out the text with the correct reading order.\n",
|
||||
"\"\"\"\n",
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model_name=\"gemini-2.0-flash-001\",\n",
|
||||
" invalidate_cache=True,\n",
|
||||
" parsing_instruction=parsing_instruction,\n",
|
||||
" parse_mode=\"parse_page_with_lvm\",\n",
|
||||
" vendor_multimodal_model_name=\"gemini-2.0-flash\",\n",
|
||||
" # vendor_multimodal_api_key=\"fake\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" api_key=\"llx-...\",\n",
|
||||
")\n",
|
||||
"json_objs = parser.get_json_result(\"./data/XC9500_CPLD_Family.pdf\")\n",
|
||||
"json_list = json_objs[0][\"pages\"]\n",
|
||||
"docs = get_text_nodes(json_list)"
|
||||
"\n",
|
||||
"result = await parser.aparse(\"./data/XC9500_CPLD_Family.pdf\")\n",
|
||||
"gemini_documents = result.get_markdown_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -154,467 +96,115 @@
|
||||
"execution_count": null,
|
||||
"id": "96a81df0-1026-4e30-a930-f677dc31e344",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Save\n",
|
||||
"save_jsonl([d.dict() for d in docs], \"docs_gemini_2.0_flash.jsonl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ee2e6920-8893-4b39-ae12-94d13c651406",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Load\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"docs_dicts = load_jsonl(\"docs_gemini_2.0_flash.jsonl\")\n",
|
||||
"docs = [Document.parse_obj(d) for d in docs_dicts]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f3c51b0-7878-48d7-9bc3-02b516500128",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup GPT-4o baseline\n",
|
||||
"\n",
|
||||
"For comparison, we will also parse the document using GPT-4o ($0.03 per page)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6fc3f258-50ae-4988-b904-c105463a498f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 23c6627c-2e3d-46c9-88a0-7945d7e65d96\n"
|
||||
"\n",
|
||||
"\n",
|
||||
"<table>\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th></th>\n",
|
||||
"<th>XC9536</th>\n",
|
||||
"<th>XC9572</th>\n",
|
||||
"<th>XC95108</th>\n",
|
||||
"<th>XC95144</th>\n",
|
||||
"<th>XC95216</th>\n",
|
||||
"<th>XC95288</th>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td>Macrocells</td>\n",
|
||||
"<td>36</td>\n",
|
||||
"<td>72</td>\n",
|
||||
"<td>108</td>\n",
|
||||
"<td>144</td>\n",
|
||||
"<td>216</td>\n",
|
||||
"<td>288</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Usable Gates</td>\n",
|
||||
"<td>800</td>\n",
|
||||
"<td>1,600</td>\n",
|
||||
"<td>2,400</td>\n",
|
||||
"<td>3,200</td>\n",
|
||||
"<td>4,800</td>\n",
|
||||
"<td>6,400</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Registers</td>\n",
|
||||
"<td>36</td>\n",
|
||||
"<td>72</td>\n",
|
||||
"<td>108</td>\n",
|
||||
"<td>144</td>\n",
|
||||
"<td>216</td>\n",
|
||||
"<td>288</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>TPD (ns)</td>\n",
|
||||
"<td>5</td>\n",
|
||||
"<td>7.5</td>\n",
|
||||
"<td>7.5</td>\n",
|
||||
"<td>7.5</td>\n",
|
||||
"<td>10</td>\n",
|
||||
"<td>15</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Tsu (ns)</td>\n",
|
||||
"<td>3.5</td>\n",
|
||||
"<td>4.5</td>\n",
|
||||
"<td>4.5</td>\n",
|
||||
"<td>4.5</td>\n",
|
||||
"<td>6.0</td>\n",
|
||||
"<td>8.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>Tco (ns)</td>\n",
|
||||
"<td>4.0</td>\n",
|
||||
"<td>4.5</td>\n",
|
||||
"<td>4.5</td>\n",
|
||||
"<td>4.5</td>\n",
|
||||
"<td>6.0</td>\n",
|
||||
"<td>8.0</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>fCNT (MHz)(1)</td>\n",
|
||||
"<td>100</td>\n",
|
||||
"<td>125</td>\n",
|
||||
"<td>125</td>\n",
|
||||
"<td>125</td>\n",
|
||||
"<td>111.1</td>\n",
|
||||
"<td>92.2</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>fSYSTEM (MHZ)(2)</td>\n",
|
||||
"<td>100</td>\n",
|
||||
"<td>83.3</td>\n",
|
||||
"<td>83.3</td>\n",
|
||||
"<td>83.3</td>\n",
|
||||
"<td>66.7</td>\n",
|
||||
"<td>56.6</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_parse import LlamaParse\n",
|
||||
"\n",
|
||||
"parser_gpt4o = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model=\"openai-gpt4o\",\n",
|
||||
" invalidate_cache=True,\n",
|
||||
" parsing_instruction=parsing_instruction,\n",
|
||||
")\n",
|
||||
"json_objs_gpt4o = parser_gpt4o.get_json_result(\"./data/XC9500_CPLD_Family.pdf\")\n",
|
||||
"json_list_gpt4o = json_objs_gpt4o[0][\"pages\"]\n",
|
||||
"docs_gpt4o = get_text_nodes(json_list_gpt4o)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6a47f04e-12e1-4c80-a71d-ef7721f96401",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Save\n",
|
||||
"save_jsonl([d.dict() for d in docs_gpt4o], \"docs_gpt4o.jsonl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c38b5ca3-fa87-434b-b477-bf6a4962eb3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Load\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"docs_gpt4o_dicts = load_jsonl(\"docs_gpt4o.jsonl\")\n",
|
||||
"docs_gpt4o = [Document.parse_obj(d) for d in docs_gpt4o_dicts]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "44c20f7a-2901-4dd0-b635-a4b33c5664c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## View Results\n",
|
||||
"\n",
|
||||
"Let's visualize the results between GPT-4o and Gemini Flash 2.0 along with the original document page."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bf314141-9f6d-4453-beb9-0106cdf196bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out an example page 2 below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c70d420d-1778-4b0d-81e2-db09276e90cf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0950ecad-248c-4c3c-98b9-ab1a9dabd5b4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see that the parsed text is fairly similar between Gemini 2.0 Flash and GPT-4o. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "778698aa-da7e-4081-b3b5-0372f228536f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 3\n",
|
||||
"\n",
|
||||
"The image shows the architecture of the XC9500 In-System Programmable CPLD Family, which is marked as obsolete. Here's a breakdown of the components and their connections:\n",
|
||||
"\n",
|
||||
"### Components and Connections:\n",
|
||||
"\n",
|
||||
"1. **JTAG Port:**\n",
|
||||
" - Connects to the JTAG Controller.\n",
|
||||
"\n",
|
||||
"2. **JTAG Controller:**\n",
|
||||
" - Interfaces with the In-System Programming Controller.\n",
|
||||
" - Connects to the I/O Blocks.\n",
|
||||
"\n",
|
||||
"3. **In-System Programming Controller:**\n",
|
||||
" - Interfaces with the JTAG Controller and the Fast CONNECT Switch Matrix.\n",
|
||||
"\n",
|
||||
"4. **I/O Blocks:**\n",
|
||||
" - Multiple I/O lines connect to the Fast CONNECT Switch Matrix.\n",
|
||||
" - Includes special I/O lines for GCK, GSR, and GTS.\n",
|
||||
"\n",
|
||||
"5. **Fast CONNECT Switch Matrix:**\n",
|
||||
" - Connects to the I/O Blocks and Function Blocks.\n",
|
||||
" - Provides 36 inputs and 18 outputs to each Function Block.\n",
|
||||
"\n",
|
||||
"6. **Function Blocks (FB):**\n",
|
||||
" - Each block contains 18 macrocells.\n",
|
||||
" - Outputs from the Function Blocks drive the I/O Blocks directly.\n",
|
||||
" - Multiple Function Blocks (1 to N) are shown, each with 18 macrocells.\n",
|
||||
"\n",
|
||||
"### Function Block Details:\n",
|
||||
"\n",
|
||||
"- Each Function Block consists of 18 independent macrocells.\n",
|
||||
"- Capable of implementing combinatorial or registered functions.\n",
|
||||
"- Receives global clock, output enable, and set/reset signals.\n",
|
||||
"- Generates 18 outputs for the Fast CONNECT switch matrix.\n",
|
||||
"- Logic is implemented using a sum-of-products representation.\n",
|
||||
"- 36 inputs provide 72 true and complement signals to form 90 product terms.\n",
|
||||
"- Product terms can be allocated to each macrocell by the product term allocator.\n",
|
||||
"- Supports local feedback paths for fast counters and state machines.\n",
|
||||
"\n",
|
||||
"This architecture is designed for flexibility in implementing complex logic functions within a programmable logic device.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# using Gemini 2.0 Flash\n",
|
||||
"print(docs[2].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1511a30f-3efc-4142-9668-7dc056a24d0c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 3\n",
|
||||
"\n",
|
||||
"The diagram illustrates the architecture of the XC9500 In-System Programmable CPLD Family. Here's a breakdown of the components and their connections:\n",
|
||||
"\n",
|
||||
"1. **JTAG Port**: \n",
|
||||
" - Connects to the JTAG Controller.\n",
|
||||
"\n",
|
||||
"2. **JTAG Controller**: \n",
|
||||
" - Interfaces with the In-System Programming Controller.\n",
|
||||
"\n",
|
||||
"3. **In-System Programming Controller**: \n",
|
||||
" - Manages programming of the device.\n",
|
||||
"\n",
|
||||
"4. **I/O Blocks**: \n",
|
||||
" - Connect to external I/O pins.\n",
|
||||
" - Interface with the Fast CONNECT Switch Matrix.\n",
|
||||
"\n",
|
||||
"5. **Fast CONNECT Switch Matrix**: \n",
|
||||
" - Connects I/O Blocks to Function Blocks.\n",
|
||||
" - Provides 36 inputs and 18 outputs to each Function Block.\n",
|
||||
"\n",
|
||||
"6. **Function Blocks (FB)**: \n",
|
||||
" - Each block contains 18 macrocells.\n",
|
||||
" - Capable of implementing combinatorial or registered functions.\n",
|
||||
" - Receives global clock, output enable, and set/reset signals.\n",
|
||||
" - Outputs drive the Fast CONNECT Switch Matrix.\n",
|
||||
" - Supports local feedback paths for fast counters and state machines.\n",
|
||||
"\n",
|
||||
"7. **I/O/GCK, I/O/GSR, I/O/GTS**: \n",
|
||||
" - Special I/O pins for global clock, set/reset, and output enable signals.\n",
|
||||
"\n",
|
||||
"The architecture is designed for flexibility and high-speed operation, with each Function Block capable of handling complex logic functions.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# using GPT-4o\n",
|
||||
"print(docs_gpt4o[2].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "705f7729-fa0f-4ca0-8562-c42afeaa8532",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup RAG Pipeline\n",
|
||||
"\n",
|
||||
"Let's setup a RAG pipeline over this data.\n",
|
||||
"\n",
|
||||
"(we also use gpt4o-mini for the actual text synthesis step)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a53ee5d-cc63-421b-8896-588c83edfcf0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core import Settings\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"\n",
|
||||
"Settings.llm = OpenAI(model=\"o3-mini\")\n",
|
||||
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "60972d7a-7948-4ad7-89df-57004acee917",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from llama_index.core import SummaryIndex\n",
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"index = VectorStoreIndex(docs)\n",
|
||||
"query_engine = index.as_query_engine(similarity_top_k=5)\n",
|
||||
"\n",
|
||||
"index_gpt4o = VectorStoreIndex(docs_gpt4o)\n",
|
||||
"query_engine_gpt4o = index_gpt4o.as_query_engine(similarity_top_k=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e7df7bcb-1df4-4a01-88fc-2d596b1cc74d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"Give me the full output slew-Rate curve for (a) Rising and (b) Falling Outputs\"\n",
|
||||
"\n",
|
||||
"response = query_engine.query(query)\n",
|
||||
"response_gpt4o = query_engine_gpt4o.query(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b7070a31-3bb8-4134-8338-20bc2fd6f3d6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The full output slew-rate curve for (a) Rising and (b) Falling Outputs is represented in a graph where the output voltage starts at 1.5V and reaches the desired output level over a time period defined as T<sub>SLEW</sub>. The curve illustrates the gradual increase in voltage for rising outputs and the gradual decrease for falling outputs, effectively showing how the output edge rates can be controlled to reduce system noise.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bee8167-f021-4c87-8d28-9f40a4f7b69d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# XC9500 In-System Programmable CPLD Family\n",
|
||||
"\n",
|
||||
"Each output has independent slew rate control. Output edge rates may be slowed down to reduce system noise (with an additional time delay of T<sub>SLEW</sub>) through programming. See Figure 11.\n",
|
||||
"\n",
|
||||
"Each IOB provides user programmable ground pin capability. This allows device I/O pins to be configured as additional ground pins. By tying strategically located programmable ground pins to the external ground connection, system noise generated from large numbers of simultaneous switching outputs may be reduced.\n",
|
||||
"\n",
|
||||
"A control pull-up resistor (typically 10K ohms) is attached to each device I/O pin to prevent them from floating when the device is not in normal user operation. This resistor is active during device programming mode and system power-up. It is also activated for an erased device. The resistor is deactivated during normal operation.\n",
|
||||
"\n",
|
||||
"The output driver is capable of supplying 24 mA output drive. All output drivers in the device may be configured for either 5V TTL levels or 3.3V levels by connecting the device output voltage supply (V<sub>CCIO</sub>) to a 5V or 3.3V voltage supply. Figure 12 shows how the XC9500 device can be used in 5V only and mixed 3.3V/5V systems.\n",
|
||||
"\n",
|
||||
"## Pin-Locking Capability\n",
|
||||
"\n",
|
||||
"The capability to lock the user defined pin assignments during design changes depends on the ability of the architecture to adapt to unexpected changes. The XC9500 devices have architectural features that enhance the ability to accept design changes while maintaining the same pinout.\n",
|
||||
"\n",
|
||||
"The XC9500 architecture provides maximum routing within the Fast CONNECT switch matrix, and incorporates a flexible Function Block that allows block-wide allocation of available product terms. This provides a high level of confidence of maintaining both input and output pin assignments for unexpected design changes.\n",
|
||||
"\n",
|
||||
"For extensive design changes requiring higher logic capacity than is available in the initially chosen device, the new design may be able to fit into a larger pin-compatible device using the same pin assignments. The same board may be used with a higher density device without the expense of board rework.\n",
|
||||
"\n",
|
||||
"!Output slew-Rate for (a) Rising and (b) Falling Outputs\n",
|
||||
"\n",
|
||||
"**Figure 11:** Output slew-Rate for (a) Rising and (b) Falling Outputs\n",
|
||||
"\n",
|
||||
"| Output Voltage | Time |\n",
|
||||
"|----------------|------|\n",
|
||||
"| 1.5V | 0 |\n",
|
||||
"| T<sub>SLEW</sub> | |\n",
|
||||
"\n",
|
||||
"**Figure 12:** XC9500 Devices in (a) 5V Systems and (b) Mixed 5V/3.3V Systems\n",
|
||||
"\n",
|
||||
"| 5V CMOS or 5V TTL | 3.3V |\n",
|
||||
"|-------------------|------|\n",
|
||||
"| 5V | 0V |\n",
|
||||
"| 3.6V | 0V |\n",
|
||||
"| 3.3V | 0V |\n",
|
||||
"\n",
|
||||
"- **(a) 5V System:**\n",
|
||||
" - V<sub>CCINT</sub> V<sub>CCIO</sub>\n",
|
||||
" - XC9500 CPLD\n",
|
||||
" - IN OUT\n",
|
||||
" - GND\n",
|
||||
"\n",
|
||||
"- **(b) Mixed 5V/3.3V System:**\n",
|
||||
" - V<sub>CCINT</sub> V<sub>CCIO</sub>\n",
|
||||
" - XC9500 CPLD\n",
|
||||
" - IN OUT\n",
|
||||
" - GND\n",
|
||||
"\n",
|
||||
"www.xilinx.com\n",
|
||||
"\n",
|
||||
"DS063 (v6.0) May 17, 2013 \n",
|
||||
"Product Specification\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response.source_nodes[0].get_content())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5f9fef7f-510b-46a5-8716-f5616f542035",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The output slew-rate curve for (a) Rising and (b) Falling Outputs is represented in a timing diagram where the output voltage transitions from a low state to a high state and vice versa. \n",
|
||||
"\n",
|
||||
"For the rising output, the curve starts at 1.5V and transitions to the desired output voltage level over a time period defined as T<sub>SLEW</sub>. \n",
|
||||
"\n",
|
||||
"For the falling output, the curve similarly begins at the high output voltage and decreases to a low state, also taking the time defined as T<sub>SLEW</sub> to complete the transition.\n",
|
||||
"\n",
|
||||
"The specific values and graphical representation would typically be illustrated in a figure, but the key takeaway is that the output slew rate can be controlled to manage system noise by programming the desired T<sub>SLEW</sub> time.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response_gpt4o)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d40f9dd4-2dd4-4fa5-b636-1f901dc1601b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# XC9500 In-System Programmable CPLD Family\n",
|
||||
"\n",
|
||||
"Each output has independent slew rate control. Output edge rates may be slowed down to reduce system noise (with an additional time delay of T<sub>SLEW</sub>) through programming. See Figure 11.\n",
|
||||
"\n",
|
||||
"Each IOB provides user programmable ground pin capability. This allows device I/O pins to be configured as additional ground pins. By tying strategically located programmable ground pins to the external ground connection, system noise generated from large numbers of simultaneous switching outputs may be reduced.\n",
|
||||
"\n",
|
||||
"A control pull-up resistor (typically 10K ohms) is attached to each device I/O pin to prevent them from floating when the device is not in normal user operation. This resistor is active during device programming mode and system power-up. It is also activated for an erased device. The resistor is deactivated during normal operation.\n",
|
||||
"\n",
|
||||
"The output driver is capable of supplying 24 mA output drive. All output drivers in the device may be configured for either 5V TTL levels or 3.3V levels by connecting the device output voltage supply (V<sub>CCIO</sub>) to a 5V or 3.3V voltage supply. Figure 12 shows how the XC9500 device can be used in 5V only and mixed 3.3V/5V systems.\n",
|
||||
"\n",
|
||||
"## Pin-Locking Capability\n",
|
||||
"\n",
|
||||
"The capability to lock the user defined pin assignments during design changes depends on the ability of the architecture to adapt to unexpected changes. The XC9500 devices have architectural features that enhance the ability to accept design changes while maintaining the same pinout.\n",
|
||||
"\n",
|
||||
"The XC9500 architecture provides maximum routing within the Fast CONNECT switch matrix, and incorporates a flexible Function Block that allows block-wide allocation of available product terms. This provides a high level of confidence of maintaining both input and output pin assignments for unexpected design changes.\n",
|
||||
"\n",
|
||||
"For extensive design changes requiring higher logic capacity than is available in the initially chosen device, the new design may be able to fit into a larger pin-compatible device using the same pin assignments. The same board may be used with a higher density device without the expense of board rework.\n",
|
||||
"\n",
|
||||
"!Output slew-Rate for (a) Rising and (b) Falling Outputs\n",
|
||||
"\n",
|
||||
"**Figure 11:** Output slew-Rate for (a) Rising and (b) Falling Outputs\n",
|
||||
"\n",
|
||||
"| Output Voltage | Time |\n",
|
||||
"|----------------|------|\n",
|
||||
"| 1.5V | 0 |\n",
|
||||
"| T<sub>SLEW</sub> | |\n",
|
||||
"\n",
|
||||
"**Figure 12:** XC9500 Devices in (a) 5V Systems and (b) Mixed 5V/3.3V Systems\n",
|
||||
"\n",
|
||||
"| 5V CMOS or 5V TTL | 3.3V |\n",
|
||||
"|-------------------|------|\n",
|
||||
"| 5V | 0V |\n",
|
||||
"| 3.6V | 0V |\n",
|
||||
"| 3.3V | 0V |\n",
|
||||
"\n",
|
||||
"- **XC9500 CPLD** \n",
|
||||
" - **IN** \n",
|
||||
" - **OUT** \n",
|
||||
" - **GND** \n",
|
||||
"\n",
|
||||
"www.xilinx.com \n",
|
||||
"DS063 (v6.0) May 17, 2013 \n",
|
||||
"Product Specification\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response_gpt4o.source_nodes[0].get_content())"
|
||||
"print(gemini_documents[0].text)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -11,7 +11,12 @@
|
||||
"\n",
|
||||
"This cookbook shows you how to use LlamaParse to parse any document with the multimodal capabilities of GPT4o-mini.\n",
|
||||
"\n",
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n"
|
||||
"LlamaParse allows you to plug in external, multimodal model vendors for parsing - we handle the error correction, validation, and scalability/reliability for you.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-19-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,24 +29,39 @@
|
||||
"Download the data - the blog post from Meta on Llama3.1, in PDF form."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "91a9e532-1454-40e0-bbf0-fd442c350121",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d9fb0aa-74cd-476f-8161-efd9e04248bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2025-08-20 09:01:29-- https://www.dropbox.com/scl/fi/8iu23epvv3473im5rq19g/llama3.1_blog.pdf?rlkey=5u417tbdox4aip33fdubvni56&st=dzozd11e&dl=1\n",
|
||||
"Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112\n",
|
||||
"Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 302 Found\n",
|
||||
"Location: https://uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com/cd/0/inline/CvxiobAxsMsABs0DEDrx1mQ4P4l3JsmP2sR43DDeERGKF46mpTn7IFVWd4tKNsnH5ktPFJS_XYJG7jzY4B_-hCc9sXoVRVL74CYo95FjlLfLroFwdAtq-f00E7BrSfVABBwjXltHN2LtIXuyNWsRg0_t/file?dl=1# [following]\n",
|
||||
"--2025-08-20 09:01:29-- https://uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com/cd/0/inline/CvxiobAxsMsABs0DEDrx1mQ4P4l3JsmP2sR43DDeERGKF46mpTn7IFVWd4tKNsnH5ktPFJS_XYJG7jzY4B_-hCc9sXoVRVL74CYo95FjlLfLroFwdAtq-f00E7BrSfVABBwjXltHN2LtIXuyNWsRg0_t/file?dl=1\n",
|
||||
"Resolving uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com (uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com)... 162.125.1.15, 2620:100:6016:15::a27d:10f\n",
|
||||
"Connecting to uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com (uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com)|162.125.1.15|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 302 Found\n",
|
||||
"Location: /cd/0/inline2/CvwV8il1jZEc68KALo74AWW6KpFtSpJtE6pURwe0VPUfy3h8444UzIbiuEzJqt-nrT642eNdWpfhf0cZywophk8xT3g1EZALEaa1NWuV7sqSPm-LwY7uv1PvJW4B8Zx7iyK4zHf6rAV7Z_k6xTaSgtFmQxrrkm6LMOQE1URHDxNUa4gGU_2drLmiEQyZsgHMcN0pHGJMJVNtKTlheHDZkB2ldrqnozKIMIQWjP8f0eWjPLMXKmJtnU19XnwHIKp_cmZ4hsPa06zLovbrkei_40N0r99sfU2mgjQasv2osRfAOIBBQFKSIzJXCHct_QxeVaHSR6wveM9LS0JIK4c1FbPD1zS4NJVReDkuDXvcm23VOCheRyh8lsegV8rNRpOVZd8/file?dl=1 [following]\n",
|
||||
"--2025-08-20 09:01:30-- https://uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com/cd/0/inline2/CvwV8il1jZEc68KALo74AWW6KpFtSpJtE6pURwe0VPUfy3h8444UzIbiuEzJqt-nrT642eNdWpfhf0cZywophk8xT3g1EZALEaa1NWuV7sqSPm-LwY7uv1PvJW4B8Zx7iyK4zHf6rAV7Z_k6xTaSgtFmQxrrkm6LMOQE1URHDxNUa4gGU_2drLmiEQyZsgHMcN0pHGJMJVNtKTlheHDZkB2ldrqnozKIMIQWjP8f0eWjPLMXKmJtnU19XnwHIKp_cmZ4hsPa06zLovbrkei_40N0r99sfU2mgjQasv2osRfAOIBBQFKSIzJXCHct_QxeVaHSR6wveM9LS0JIK4c1FbPD1zS4NJVReDkuDXvcm23VOCheRyh8lsegV8rNRpOVZd8/file?dl=1\n",
|
||||
"Reusing existing connection to uc29796f0b776076192093df7b2d.dl.dropboxusercontent.com:443.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 14191422 (14M) [application/binary]\n",
|
||||
"Saving to: ‘data/llama3.1_blog.pdf’\n",
|
||||
"\n",
|
||||
"data/llama3.1_blog. 100%[===================>] 13.53M 24.4MB/s in 0.6s \n",
|
||||
"\n",
|
||||
"2025-08-20 09:01:31 (24.4 MB/s) - ‘data/llama3.1_blog.pdf’ saved [14191422/14191422]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!wget \"https://www.dropbox.com/scl/fi/8iu23epvv3473im5rq19g/llama3.1_blog.pdf?rlkey=5u417tbdox4aip33fdubvni56&st=dzozd11e&dl=1\" -O \"data/llama3.1_blog.pdf\""
|
||||
]
|
||||
@@ -61,46 +81,7 @@
|
||||
"source": [
|
||||
"## Initialize LlamaParse\n",
|
||||
"\n",
|
||||
"Initialize LlamaParse in multimodal mode, and specify the vendor.\n",
|
||||
"\n",
|
||||
"**NOTE**: optionally you can specify the OpenAI API key. If you do so you will be charged our base LlamaParse price of 0.3c per page. If you don't then you will be charged 1.5c per page, as we will make the calls to gpt4o-mini for you and give you price predictability."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dc921729-3446-42ca-8e1b-a6fd26195ed9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"from typing import List\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_text_nodes(json_list: List[dict]):\n",
|
||||
" text_nodes = []\n",
|
||||
" for idx, page in enumerate(json_list):\n",
|
||||
" text_node = TextNode(text=page[\"md\"], metadata={\"page\": page[\"page\"]})\n",
|
||||
" text_nodes.append(text_node)\n",
|
||||
" return text_nodes\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def save_jsonl(data_list, filename):\n",
|
||||
" \"\"\"Save a list of dictionaries as JSON Lines.\"\"\"\n",
|
||||
" with open(filename, \"w\") as file:\n",
|
||||
" for item in data_list:\n",
|
||||
" json.dump(item, file)\n",
|
||||
" file.write(\"\\n\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def load_jsonl(filename):\n",
|
||||
" \"\"\"Load a list of dictionaries from JSON Lines.\"\"\"\n",
|
||||
" data_list = []\n",
|
||||
" with open(filename, \"r\") as file:\n",
|
||||
" for line in file:\n",
|
||||
" data_list.append(json.loads(line))\n",
|
||||
" return data_list"
|
||||
"Initialize LlamaParse in multimodal mode, and specify the vendor."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -113,7 +94,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id bf3e7341-bb11-42d4-a5f7-bb5260ad792c\n"
|
||||
"Started parsing the file under job_id 5c002568-5fcb-4741-abb2-6cfe598646c1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -121,103 +102,17 @@
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" parse_mode=\"parse_page_with_lvm\",\n",
|
||||
" vendor_multimodal_model_name=\"openai-gpt-4o-mini\",\n",
|
||||
" invalidate_cache=True,\n",
|
||||
" # vendor_multimodal_api_key=\"fake\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" api_key=\"llx-...\",\n",
|
||||
")\n",
|
||||
"json_objs = parser.get_json_result(\"./data/llama3.1_blog.pdf\")\n",
|
||||
"json_list = json_objs[0][\"pages\"]\n",
|
||||
"docs = get_text_nodes(json_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "96a81df0-1026-4e30-a930-f677dc31e344",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Save\n",
|
||||
"save_jsonl([d.dict() for d in docs], \"docs.jsonl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ee2e6920-8893-4b39-ae12-94d13c651406",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Load\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"docs_dicts = load_jsonl(\"docs.jsonl\")\n",
|
||||
"docs = [Document.parse_obj(d) for d in docs_dicts]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f3c51b0-7878-48d7-9bc3-02b516500128",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup GPT-4o baseline\n",
|
||||
"\n",
|
||||
"For comparison, we will also parse the document using GPT-4o (3c per page)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6fc3f258-50ae-4988-b904-c105463a498f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 391ff280-08e5-4143-85f2-90ada287e26c\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser_gpt4o = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model=\"openai-gpt4o\",\n",
|
||||
" # invalidate_cache=True\n",
|
||||
")\n",
|
||||
"json_objs_gpt4o = parser_gpt4o.get_json_result(\"./data/llama3.1_blog.pdf\")\n",
|
||||
"# json_objs_gpt4o = parser.get_json_result(\"./data/llama2-p33.pdf\")\n",
|
||||
"json_list_gpt4o = json_objs_gpt4o[0][\"pages\"]\n",
|
||||
"docs_gpt4o = get_text_nodes(json_list_gpt4o)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6a47f04e-12e1-4c80-a71d-ef7721f96401",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Save\n",
|
||||
"save_jsonl([d.dict() for d in docs_gpt4o], \"docs_gpt4o.jsonl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c38b5ca3-fa87-434b-b477-bf6a4962eb3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optional: Load\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"docs_gpt4o_dicts = load_jsonl(\"docs_gpt4o.jsonl\")\n",
|
||||
"docs_gpt4o = [Document.parse_obj(d) for d in docs_gpt4o_dicts]"
|
||||
"result = await parser.aparse(\"./data/llama3.1_blog.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -227,11 +122,17 @@
|
||||
"source": [
|
||||
"## View Results\n",
|
||||
"\n",
|
||||
"Let's visualize the results between GPT-4o-mini and GPT-4o along with the original document page.\n",
|
||||
"\n",
|
||||
"We see that \n",
|
||||
"\n",
|
||||
"**NOTE**: If you're using llama2-p33, just use `docs[0]`"
|
||||
"Let's visualize the results with gpt-4o-mini along with the original document page."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "592d82bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = result.get_markdown_documents(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -244,101 +145,54 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 5\n",
|
||||
"page_number: 5\n",
|
||||
"file_name: ./data/llama3.1_blog.pdf\n",
|
||||
"\n",
|
||||
"# Llama 3.1 Model Evaluation\n",
|
||||
" \n",
|
||||
"Introducing Llama 3.1: Our most capable models to date \n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## Category Benchmark\n",
|
||||
"# Category Benchmark\n",
|
||||
"\n",
|
||||
"| Benchmark | Gemma 2 9B IT | Mistral 7B Instruct | Llama 3.1 70B | Mistral 8x228B Instruct | GPT 3.5 Turbo |\n",
|
||||
"|-------------------------------|----------------|----------------------|----------------|-------------------------|----------------|\n",
|
||||
"| General | | | | | |\n",
|
||||
"| MMLU (0-shot, CoT) | 73.0 | 72.3 | 86.0 | 79.9 | 69.8 |\n",
|
||||
"| MMLU PRO (5-shot, CoT) | 48.3 | 36.9 | 66.4 | 56.3 | 49.2 |\n",
|
||||
"| IFEval | 80.4 | 73.6 | 87.5 | 72.7 | 69.9 |\n",
|
||||
"| Code | | | | | |\n",
|
||||
"| HumanEval (0-shot) | 72.6 | 54.3 | 80.5 | 75.6 | 68.0 |\n",
|
||||
"| MBPP EvalPlus (Human) (0-shot, CoT) | 72.8 | 71.7 | 86.0 | 78.6 | 82.0 |\n",
|
||||
"| Math | | | | | |\n",
|
||||
"| GSM8K | 84.5 | 76.7 | 95.1 | 88.2 | 81.6 |\n",
|
||||
"| MATH (0-shot, CoT) | 51.9 | 44.3 | 70.8 | 54.1 | 43.1 |\n",
|
||||
"| Reasoning | | | | | |\n",
|
||||
"| ARC Challenge | 83.4 | 87.6 | 74.2 | 87.7 | 83.7 |\n",
|
||||
"| GPA (0-shot) | 32.8 | 24.8 | 46.7 | 33.3 | 35.8 |\n",
|
||||
"| Tool use | | | | | |\n",
|
||||
"| BFCL | 76.1 | 64.0 | 94.8 | 81.4 | 78.0 |\n",
|
||||
"| Noxus | 38.5 | 30.0 | 24.7 | 48.5 | 37.5 |\n",
|
||||
"| Long context | | | | | |\n",
|
||||
"| ZeroSCROLLS/QualiTY | 81.0 | - | 90.5 | - | - |\n",
|
||||
"| InfiniteBench/En.MC | 65.1 | - | 78.2 | - | - |\n",
|
||||
"| NHI/Multi-needle | 98.8 | - | 97.5 | - | - |\n",
|
||||
"| Multilingual | | | | | |\n",
|
||||
"| MGSM (0-shot) | 68.9 | 53.2 | 86.9 | 71.1 | 51.4 |\n",
|
||||
"| Benchmark | Llama 3.1 8B | Gemma 2 9B IT | Mistral 7B Instruct | Llama 3.1 70B | Mixtral 8x228 Instruct | GPT 3.5 Turbo |\n",
|
||||
"|-------------------------------|---------------|----------------|---------------------|----------------|------------------------|----------------|\n",
|
||||
"| General | | | | | | |\n",
|
||||
"| MMLU (0-shot, non-CoT) | 73.0 | 72.3 | 60.5 | 86.0 | 79.9 | 69.8 |\n",
|
||||
"| MMLU PRO (5-shot, CoT) | 48.3 | 36.9 | 36.9 | 66.4 | 56.3 | 49.2 |\n",
|
||||
"| IFEval | 80.4 | 73.6 | 57.6 | 87.5 | 72.7 | 69.9 |\n",
|
||||
"| Code | | | | | | |\n",
|
||||
"| HumanEval (0-shot) | 72.6 | 54.3 | 40.2 | 80.5 | 75.6 | 68.0 |\n",
|
||||
"| MBPP EvalPlus (based on CoT) | 72.8 | 71.7 | 49.5 | 86.0 | 78.6 | 82.0 |\n",
|
||||
"| Math | | | | | | |\n",
|
||||
"| GSM8K (0-shot, CoT) | 84.5 | 76.7 | 53.2 | 95.1 | 88.2 | 81.6 |\n",
|
||||
"| MATH (0-shot, CoT) | 51.9 | 44.3 | 13.0 | 68.0 | 54.1 | 43.1 |\n",
|
||||
"| Reasoning | | | | | | |\n",
|
||||
"| ARC Challenge (0-shot) | 83.4 | 87.6 | 74.2 | 94.8 | 88.7 | 83.7 |\n",
|
||||
"| GPA (0-shot) | 32.8 | 28.8 | 28.8 | 46.7 | 33.3 | 30.8 |\n",
|
||||
"| Tool use | | | | | | |\n",
|
||||
"| BFCL | 76.1 | 60.4 | 84.8 | | | 85.9 |\n",
|
||||
"| Nexus | 38.5 | 30.0 | 24.7 | 56.7 | 48.5 | 37.2 |\n",
|
||||
"| Long context | | | | | | |\n",
|
||||
"| ZeroSCROLLS/QualiTY | 81.0 | | 90.5 | | | |\n",
|
||||
"| InfiniteBench/En.MC | 65.1 | | 78.2 | | | |\n",
|
||||
"| NIH/Multi-needle | 98.8 | - | - | 97.5 | - | - |\n",
|
||||
"| Multilingual MGSM (0-shot) | 68.9 | 53.2 | 29.9 | 86.9 | 71.1 | 51.4 |\n",
|
||||
"\n",
|
||||
"## Llama 3.1 405B Human Evaluation\n",
|
||||
"# Llama 3.1 405B Human Evaluation\n",
|
||||
"\n",
|
||||
"| Comparison | Win Rate | Tie Rate | Loss Rate |\n",
|
||||
"|----------------------------------------------|----------|----------|-----------|\n",
|
||||
"| Llama 3.1 405B vs GPT-4-0125-Preview | 23.3% | 52.2% | 24.5% |\n",
|
||||
"| Llama 3.1 405B vs GPT-4o | 19.1% | 51.7% | 29.2% |\n",
|
||||
"| Llama 3.1 405B vs Claude 3.5 Sonnet | 24.9% | 50.8% | 24.2% |\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# using GPT4o-mini\n",
|
||||
"print(docs[4].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1511a30f-3efc-4142-9668-7dc056a24d0c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page: 5\n",
|
||||
"\n",
|
||||
"# Introducing Llama 3.1: Our most capable models to date\n",
|
||||
"\n",
|
||||
"## Meta\n",
|
||||
"\n",
|
||||
"| Category | Benchmark | Llama 3.1 8B | Gemma 2 9B IT | Mistral 7B Instruct | Llama 3.1 70B | Mixtral 8x22B Instruct | GPT 3.5 Turbo |\n",
|
||||
"|----------|-----------|--------------|---------------|---------------------|---------------|-----------------------|---------------|\n",
|
||||
"| General | MMLU (0-shot, CoT) | 73.0 | 72.3 (0-shot, non-CoT) | 60.5 | 86.0 | 79.9 | 69.8 |\n",
|
||||
"| | MMLU PRO (5-shot, CoT) | 48.3 | 71.7 | 36.9 | 66.4 | 56.3 | 49.2 |\n",
|
||||
"| | ITEval | 80.4 | 73.6 | 57.6 | 87.5 | 72.7 | 69.9 |\n",
|
||||
"| Code | HumanEval (0-shot) | 72.6 | 54.3 | 40.2 | 80.5 | 75.6 | 68.0 |\n",
|
||||
"| | MBPP EvalPlus (5-shot) (0-shot) | 72.8 | 71.7 | 49.5 | 86.0 | 78.6 | 82.0 |\n",
|
||||
"| Math | GSM8K | 84.5 | 76.7 | 53.2 | 95.1 | 88.2 | 81.6 |\n",
|
||||
"| | MATH (0-shot, CoT) | 51.9 | 44.3 | 13.0 | 68.0 | 54.1 | 43.1 |\n",
|
||||
"| Reasoning | ARC Challenge (0-shot) | 83.4 | 87.6 | 74.2 | 94.8 | 88.7 | 83.7 |\n",
|
||||
"| | GOPA (0-shot) | 32.8 | 40.8 | 28.0 | 46.7 | - | - |\n",
|
||||
"| Tool use | BFCL | 76.1 | 60.3 | 60.4 | 94.8 | - | 85.9 |\n",
|
||||
"| | Noxus | 38.5 | 30.0 | 24.7 | 56.7 | 48.5 | 37.2 |\n",
|
||||
"| Long context | ZeroSCROLLS/QuaLITY | 81.0 | - | - | 90.5 | - | - |\n",
|
||||
"| | InfiniteBench/En.MC | 65.1 | - | - | 78.2 | - | - |\n",
|
||||
"| | NIH/Multi-needle | 98.8 | - | - | 97.5 | - | - |\n",
|
||||
"| Multilingual | Multilingual MGSM (0-shot) | 68.9 | 53.2 | 29.9 | 86.9 | 71.1 | 51.4 |\n",
|
||||
"\n",
|
||||
"## Llama 3.1 405B Human Evaluation\n",
|
||||
"\n",
|
||||
"| Model Comparison | Win | Tie | Loss |\n",
|
||||
"|------------------|-----|-----|------|\n",
|
||||
"| Llama 3.1 405B vs GPT-4-0125-Preview | 23.3% | 52.2% | 24.5% |\n",
|
||||
"| Llama 3.1 405B vs GPT-4o | 19.1% | 51.7% | 29.2% |\n",
|
||||
"| Llama 3.1 405B vs Claude 3.5 Sonnet | 24.9% | 50.8% | 24.2% |\n",
|
||||
"| Comparison | Win | Tie | Loss |\n",
|
||||
"|------------------------------------------------|-------|-------|--------|\n",
|
||||
"| Llama 3.1 405B vs GPT-4-0125-Preview | 23.3% | 52.2% | 24.5% |\n",
|
||||
"| Llama 3.1 405B vs GPT-4 | 19.1% | 51.7% | 29.2% |\n",
|
||||
"| Llama 3.1 405B vs Claude 3.5 Sonnet | 24.9% | 50.8% | 24.2% |\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"https://ai.meta.com/blog/meta-llama-3-1/\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# using GPT-4o\n",
|
||||
"print(docs_gpt4o[4].get_content(metadata_mode=\"all\"))"
|
||||
"print(documents[4].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -350,7 +204,7 @@
|
||||
"\n",
|
||||
"Let's setup a RAG pipeline over this data.\n",
|
||||
"\n",
|
||||
"(we also use gpt4o-mini for the actual text synthesis step)."
|
||||
"(we also use gpt-5-mini for the actual text synthesis step)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -364,8 +218,8 @@
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"\n",
|
||||
"Settings.llm = OpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\")"
|
||||
"Settings.llm = OpenAI(model=\"gpt-5-mini\", api_key=\"sk-...\")\n",
|
||||
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\", api_key=\"sk-...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -375,15 +229,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from llama_index.core import SummaryIndex\n",
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"index = VectorStoreIndex(docs)\n",
|
||||
"query_engine = index.as_query_engine(similarity_top_k=5)\n",
|
||||
"\n",
|
||||
"index_gpt4o = VectorStoreIndex(docs_gpt4o)\n",
|
||||
"query_engine_gpt4o = index_gpt4o.as_query_engine(similarity_top_k=5)"
|
||||
"index = VectorStoreIndex.from_documents(documents)\n",
|
||||
"query_engine = index.as_query_engine(similarity_top_k=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -395,8 +244,7 @@
|
||||
"source": [
|
||||
"query = \"How does Llama3.1 compare against gpt-4o and Claude 3.5 Sonnet in human evals?\"\n",
|
||||
"\n",
|
||||
"response = query_engine.query(query)\n",
|
||||
"response_gpt4o = query_engine_gpt4o.query(query)"
|
||||
"response = query_engine.query(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -409,7 +257,13 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In human evaluations, Llama 3.1 405B has a win rate of 19.1% against GPT-4o and 24.9% against Claude 3.5 Sonnet. The tie rates for Llama 3.1 405B are 51.7% against GPT-4o and 50.8% against Claude 3.5 Sonnet, while the loss rates are 29.2% against GPT-4o and 24.2% against Claude 3.5 Sonnet. This indicates that Llama 3.1 performs competitively in comparison to both models, with a notable number of ties.\n"
|
||||
"Reported human-evaluation results for Llama 3.1 (405B):\n",
|
||||
"\n",
|
||||
"- vs GPT-4-0125-Preview: Win 23.3%, Tie 52.2%, Loss 24.5% \n",
|
||||
"- vs GPT-4: Win 19.1%, Tie 51.7%, Loss 29.2% \n",
|
||||
"- vs Claude 3.5 Sonnet: Win 24.9%, Tie 50.8%, Loss 24.2%\n",
|
||||
"\n",
|
||||
"There are no separate head-to-head human-eval numbers published specifically for GPT‑4o in the reported results.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -420,128 +274,65 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bee8167-f021-4c87-8d28-9f40a4f7b69d",
|
||||
"id": "1200c9c0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# Llama 3.1 Model Evaluation\n",
|
||||
"Introducing Llama 3.1: Our most capable models to date \n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## Category Benchmark\n",
|
||||
"# Category Benchmark\n",
|
||||
"\n",
|
||||
"| Benchmark | Gemma 2 9B IT | Mistral 7B Instruct | Llama 3.1 70B | Mistral 8x228B Instruct | GPT 3.5 Turbo |\n",
|
||||
"|-------------------------------|----------------|----------------------|----------------|-------------------------|----------------|\n",
|
||||
"| General | | | | | |\n",
|
||||
"| MMLU (0-shot, CoT) | 73.0 | 72.3 | 86.0 | 79.9 | 69.8 |\n",
|
||||
"| MMLU PRO (5-shot, CoT) | 48.3 | 36.9 | 66.4 | 56.3 | 49.2 |\n",
|
||||
"| IFEval | 80.4 | 73.6 | 87.5 | 72.7 | 69.9 |\n",
|
||||
"| Code | | | | | |\n",
|
||||
"| HumanEval (0-shot) | 72.6 | 54.3 | 80.5 | 75.6 | 68.0 |\n",
|
||||
"| MBPP EvalPlus (Human) (0-shot, CoT) | 72.8 | 71.7 | 86.0 | 78.6 | 82.0 |\n",
|
||||
"| Math | | | | | |\n",
|
||||
"| GSM8K | 84.5 | 76.7 | 95.1 | 88.2 | 81.6 |\n",
|
||||
"| MATH (0-shot, CoT) | 51.9 | 44.3 | 70.8 | 54.1 | 43.1 |\n",
|
||||
"| Reasoning | | | | | |\n",
|
||||
"| ARC Challenge | 83.4 | 87.6 | 74.2 | 87.7 | 83.7 |\n",
|
||||
"| GPA (0-shot) | 32.8 | 24.8 | 46.7 | 33.3 | 35.8 |\n",
|
||||
"| Tool use | | | | | |\n",
|
||||
"| BFCL | 76.1 | 64.0 | 94.8 | 81.4 | 78.0 |\n",
|
||||
"| Noxus | 38.5 | 30.0 | 24.7 | 48.5 | 37.5 |\n",
|
||||
"| Long context | | | | | |\n",
|
||||
"| ZeroSCROLLS/QualiTY | 81.0 | - | 90.5 | - | - |\n",
|
||||
"| InfiniteBench/En.MC | 65.1 | - | 78.2 | - | - |\n",
|
||||
"| NHI/Multi-needle | 98.8 | - | 97.5 | - | - |\n",
|
||||
"| Multilingual | | | | | |\n",
|
||||
"| MGSM (0-shot) | 68.9 | 53.2 | 86.9 | 71.1 | 51.4 |\n",
|
||||
"| Benchmark | Llama 3.1 8B | Gemma 2 9B IT | Mistral 7B Instruct | Llama 3.1 70B | Mixtral 8x228 Instruct | GPT 3.5 Turbo |\n",
|
||||
"|-------------------------------|---------------|----------------|---------------------|----------------|------------------------|----------------|\n",
|
||||
"| General | | | | | | |\n",
|
||||
"| MMLU (0-shot, non-CoT) | 73.0 | 72.3 | 60.5 | 86.0 | 79.9 | 69.8 |\n",
|
||||
"| MMLU PRO (5-shot, CoT) | 48.3 | 36.9 | 36.9 | 66.4 | 56.3 | 49.2 |\n",
|
||||
"| IFEval | 80.4 | 73.6 | 57.6 | 87.5 | 72.7 | 69.9 |\n",
|
||||
"| Code | | | | | | |\n",
|
||||
"| HumanEval (0-shot) | 72.6 | 54.3 | 40.2 | 80.5 | 75.6 | 68.0 |\n",
|
||||
"| MBPP EvalPlus (based on CoT) | 72.8 | 71.7 | 49.5 | 86.0 | 78.6 | 82.0 |\n",
|
||||
"| Math | | | | | | |\n",
|
||||
"| GSM8K (0-shot, CoT) | 84.5 | 76.7 | 53.2 | 95.1 | 88.2 | 81.6 |\n",
|
||||
"| MATH (0-shot, CoT) | 51.9 | 44.3 | 13.0 | 68.0 | 54.1 | 43.1 |\n",
|
||||
"| Reasoning | | | | | | |\n",
|
||||
"| ARC Challenge (0-shot) | 83.4 | 87.6 | 74.2 | 94.8 | 88.7 | 83.7 |\n",
|
||||
"| GPA (0-shot) | 32.8 | 28.8 | 28.8 | 46.7 | 33.3 | 30.8 |\n",
|
||||
"| Tool use | | | | | | |\n",
|
||||
"| BFCL | 76.1 | 60.4 | 84.8 | | | 85.9 |\n",
|
||||
"| Nexus | 38.5 | 30.0 | 24.7 | 56.7 | 48.5 | 37.2 |\n",
|
||||
"| Long context | | | | | | |\n",
|
||||
"| ZeroSCROLLS/QualiTY | 81.0 | | 90.5 | | | |\n",
|
||||
"| InfiniteBench/En.MC | 65.1 | | 78.2 | | | |\n",
|
||||
"| NIH/Multi-needle | 98.8 | - | - | 97.5 | - | - |\n",
|
||||
"| Multilingual MGSM (0-shot) | 68.9 | 53.2 | 29.9 | 86.9 | 71.1 | 51.4 |\n",
|
||||
"\n",
|
||||
"## Llama 3.1 405B Human Evaluation\n",
|
||||
"# Llama 3.1 405B Human Evaluation\n",
|
||||
"\n",
|
||||
"| Comparison | Win Rate | Tie Rate | Loss Rate |\n",
|
||||
"|----------------------------------------------|----------|----------|-----------|\n",
|
||||
"| Llama 3.1 405B vs GPT-4-0125-Preview | 23.3% | 52.2% | 24.5% |\n",
|
||||
"| Llama 3.1 405B vs GPT-4o | 19.1% | 51.7% | 29.2% |\n",
|
||||
"| Llama 3.1 405B vs Claude 3.5 Sonnet | 24.9% | 50.8% | 24.2% |\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response.source_nodes[1].get_content())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5f9fef7f-510b-46a5-8716-f5616f542035",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In human evaluations, Llama 3.1 405B shows competitive performance against GPT-4o and Claude 3.5 Sonnet. Specifically, when compared to GPT-4o, Llama 3.1 won 19.1% of the time, tied 51.7%, and lost 29.2%. Against Claude 3.5 Sonnet, it won 24.9% of the time, tied 50.8%, and lost 24.2%. This indicates that Llama 3.1 performs comparably in real-world scenarios against these leading models.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response_gpt4o)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d40f9dd4-2dd4-4fa5-b636-1f901dc1601b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# Introducing Llama 3.1: Our most capable models to date\n",
|
||||
"\n",
|
||||
"## Meta\n",
|
||||
"\n",
|
||||
"| Category | Benchmark | Llama 3.1 8B | Gemma 2 9B IT | Mistral 7B Instruct | Llama 3.1 70B | Mixtral 8x22B Instruct | GPT 3.5 Turbo |\n",
|
||||
"|----------|-----------|--------------|---------------|---------------------|---------------|-----------------------|---------------|\n",
|
||||
"| General | MMLU (0-shot, CoT) | 73.0 | 72.3 (0-shot, non-CoT) | 60.5 | 86.0 | 79.9 | 69.8 |\n",
|
||||
"| | MMLU PRO (5-shot, CoT) | 48.3 | 71.7 | 36.9 | 66.4 | 56.3 | 49.2 |\n",
|
||||
"| | ITEval | 80.4 | 73.6 | 57.6 | 87.5 | 72.7 | 69.9 |\n",
|
||||
"| Code | HumanEval (0-shot) | 72.6 | 54.3 | 40.2 | 80.5 | 75.6 | 68.0 |\n",
|
||||
"| | MBPP EvalPlus (5-shot) (0-shot) | 72.8 | 71.7 | 49.5 | 86.0 | 78.6 | 82.0 |\n",
|
||||
"| Math | GSM8K | 84.5 | 76.7 | 53.2 | 95.1 | 88.2 | 81.6 |\n",
|
||||
"| | MATH (0-shot, CoT) | 51.9 | 44.3 | 13.0 | 68.0 | 54.1 | 43.1 |\n",
|
||||
"| Reasoning | ARC Challenge (0-shot) | 83.4 | 87.6 | 74.2 | 94.8 | 88.7 | 83.7 |\n",
|
||||
"| | GOPA (0-shot) | 32.8 | 40.8 | 28.0 | 46.7 | - | - |\n",
|
||||
"| Tool use | BFCL | 76.1 | 60.3 | 60.4 | 94.8 | - | 85.9 |\n",
|
||||
"| | Noxus | 38.5 | 30.0 | 24.7 | 56.7 | 48.5 | 37.2 |\n",
|
||||
"| Long context | ZeroSCROLLS/QuaLITY | 81.0 | - | - | 90.5 | - | - |\n",
|
||||
"| | InfiniteBench/En.MC | 65.1 | - | - | 78.2 | - | - |\n",
|
||||
"| | NIH/Multi-needle | 98.8 | - | - | 97.5 | - | - |\n",
|
||||
"| Multilingual | Multilingual MGSM (0-shot) | 68.9 | 53.2 | 29.9 | 86.9 | 71.1 | 51.4 |\n",
|
||||
"\n",
|
||||
"## Llama 3.1 405B Human Evaluation\n",
|
||||
"\n",
|
||||
"| Model Comparison | Win | Tie | Loss |\n",
|
||||
"|------------------|-----|-----|------|\n",
|
||||
"| Llama 3.1 405B vs GPT-4-0125-Preview | 23.3% | 52.2% | 24.5% |\n",
|
||||
"| Llama 3.1 405B vs GPT-4o | 19.1% | 51.7% | 29.2% |\n",
|
||||
"| Llama 3.1 405B vs Claude 3.5 Sonnet | 24.9% | 50.8% | 24.2% |\n",
|
||||
"| Comparison | Win | Tie | Loss |\n",
|
||||
"|------------------------------------------------|-------|-------|--------|\n",
|
||||
"| Llama 3.1 405B vs GPT-4-0125-Preview | 23.3% | 52.2% | 24.5% |\n",
|
||||
"| Llama 3.1 405B vs GPT-4 | 19.1% | 51.7% | 29.2% |\n",
|
||||
"| Llama 3.1 405B vs Claude 3.5 Sonnet | 24.9% | 50.8% | 24.2% |\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"https://ai.meta.com/blog/meta-llama-3-1/\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response_gpt4o.source_nodes[1].get_content())"
|
||||
"print(response.source_nodes[0].text)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -15,6 +15,11 @@
|
||||
"source": [
|
||||
"This cookbook shows how to use LlamaParse and OpenAI's multimodal GPT-4o model to parse auto insurance claim documents that contain complex tabular data. In this example, we will use an auto insurance claim template form, which contains complex tabular inputs regarding information about the location of the accident, accident description, information about vehicles of both parties, and injury information. The template is shown below.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Prior to Feb 2025 | N/A | Deprecated |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This example demonstrates how LlamaParse can be used on insurance documents, which often contains complex tabular data. We parse these tabluar PDF files into markdown-formatted tables, which can be indexed and queried over with a `VectorStoreIndex`. This can help insurance companies accelerate the process of gathering information about car accidents from insurance claim documents."
|
||||
@@ -35,7 +40,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index"
|
||||
"%pip install \"llama-index>=0.13.0<0.14.0\" llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Install LlamaIndex, download the data, and apply `nest_asyncio`."
|
||||
"Install LlamaIndex, download the data, and set your API keys."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -43,7 +43,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index llama-parse"
|
||||
"%pip install \"llama-index>=0.13.0<0.14.0\" llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -57,17 +57,6 @@
|
||||
"!rm data.zip"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -83,8 +72,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"<Your OpenAI API Key>\"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"<Your LlamaCloud API Key>\""
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -105,11 +94,12 @@
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" parsing_instruction=\"Provided are a series of US legal documents.\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model_name=\"openai-gpt4o\",\n",
|
||||
" show_progress=True,\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"DATA_DIR = \"data\"\n",
|
||||
@@ -143,22 +133,117 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Parsing files: 100%|██████████| 8/8 [01:25<00:00, 10.67s/it]\n"
|
||||
"Getting job results: 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id dad7b215-360c-46a6-857e-983249441395\n",
|
||||
"Started parsing the file under job_id bcfb24fb-0b30-4bd5-a87d-2a81b2d4298a\n",
|
||||
"Started parsing the file under job_id 50417384-e3fa-44fa-9f58-8344c129cedf\n",
|
||||
"Started parsing the file under job_id 49b0620f-e9fa-4736-801f-aadd6d6e21dd\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 12%|█▎ | 1/8 [00:23<02:43, 23.42s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 729ceca5-2940-406d-b29a-0252dbf11e15\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 38%|███▊ | 3/8 [00:41<00:56, 11.20s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 0733a9c5-d4a6-4242-9bd2-f61e931424dd\n",
|
||||
"Started parsing the file under job_id a948a2f8-521a-412a-9cbd-4574814a8d2c\n",
|
||||
"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 50%|█████ | 4/8 [00:44<00:32, 8.19s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id d9929a63-4f84-4567-abd9-bc352eee1db0\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 75%|███████▌ | 6/8 [01:07<00:19, 9.70s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"...."
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 88%|████████▊ | 7/8 [02:47<00:39, 39.42s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 100%|██████████| 8/8 [03:32<00:00, 26.61s/it]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents = parser.load_data(\n",
|
||||
" files,\n",
|
||||
" extra_info={\"name\": \"US legal documents provided by the Library of Congress.\"},\n",
|
||||
")"
|
||||
"results = await parser.aparse(files)\n",
|
||||
"\n",
|
||||
"documents = []\n",
|
||||
"for result in results:\n",
|
||||
" documents.extend(result.get_markdown_documents(split_by_page=True))\n",
|
||||
"\n",
|
||||
"for document in documents:\n",
|
||||
" document.metadata[\n",
|
||||
" \"context\"\n",
|
||||
" ] = \"US legal documents provided by the Library of Congress.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Setup LlamaIndex. Set the default LLM to GPT-4o (a multi-modal model), and create an index from the documents, and persist these documents to disk. If these documents have already been persisted, then load index from the persisted docs."
|
||||
"Setup LlamaIndex for querying the data using RAG"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -169,25 +254,18 @@
|
||||
"source": [
|
||||
"from llama_index.core import (\n",
|
||||
" VectorStoreIndex,\n",
|
||||
" StorageContext,\n",
|
||||
" load_index_from_storage,\n",
|
||||
" Settings,\n",
|
||||
")\n",
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\")\n",
|
||||
"llm = OpenAI(\"gpt-4o\")\n",
|
||||
"llm = OpenAI(\"gpt-5-mini\")\n",
|
||||
"\n",
|
||||
"Settings.llm = llm\n",
|
||||
"Settings.embed_model = embed_model\n",
|
||||
"\n",
|
||||
"if not os.path.exists(\"storage_legal\"):\n",
|
||||
" index = VectorStoreIndex(documents, embed_model=embed_model)\n",
|
||||
" index.storage_context.persist(persist_dir=\"./storage_legal\")\n",
|
||||
"else:\n",
|
||||
" ctx = StorageContext.from_defaults(persist_dir=\"./storage_legal\")\n",
|
||||
" index = load_index_from_storage(ctx)\n",
|
||||
"index = VectorStoreIndex.from_documents(documents)\n",
|
||||
"\n",
|
||||
"query_engine = index.as_query_engine()"
|
||||
]
|
||||
@@ -207,7 +285,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"The majority of Barre Savings Bank's loans went to residential real estate, specifically 1-4 family mortgages, which accounted for 78.7 percent of the total loans."
|
||||
"The majority went to residential real estate lending—primarily 1–4 family mortgages (about 78.7% of loans, with home equity lines adding another 8.7%, for a total of 87.4%)."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -234,7 +312,12 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"Mr. Kubarych believes foreign markets are important because they are attractive to foreign investors for the same reasons they are attractive to Americans. The economic data is strong, and the high tech boom has created a positive perception that overshadows longer-term vulnerabilities. Additionally, foreign investors have high expectations for the U.S. to maintain a firm monetary policy in response to inflation and to act as a superpower rather than pursuing narrow nationalist economic policies."
|
||||
"He says foreign markets (especially U.S. markets) are attractive because:\n",
|
||||
"- The underlying economic data are strong.\n",
|
||||
"- The high‑tech boom creates a “halo” that attracts attention and investment.\n",
|
||||
"- There is broad, nearly bipartisan political/economic stability.\n",
|
||||
"- Foreign investors expect sensible foreign‑policy behavior and a firm monetary policy response to any rise in inflation.\n",
|
||||
"- Large foreign institutions (investment funds, insurers, banks) therefore see the markets as a safe, desirable place to put money."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -259,7 +342,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"House Speaker Nancy Pelosi and the Democratic majority are against the proposal of offshore drilling in California. Pelosi stated that offshore drilling is \"off the table,\" and Democrats have been consistently unwilling to bend environmental rules. They argue that oil companies are not using the 68 million acres of federal lands already leased to them, either because it takes a long time or they lack the necessary equipment."
|
||||
"House Democrats — including Speaker Nancy Pelosi — and other Democratic lawmakers oppose drilling off the California coast. They say it should be \"off the table\" for environmental reasons, point out that there are already millions of acres of federal lands leased to oil companies that aren’t being developed, and note oil companies have told Pelosi those leases aren’t being used because development takes a long time or the companies lack the equipment. No Democrats signed on to the proposed bill."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -284,7 +367,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"The purpose of the Ocean Science and Technology Subcommittee (SOST) is to advise and assist the Committee on Environment, Natural Resources, and Sustainability on national issues of ocean science and technology. The SOST aims to contribute to the goals for Federal ocean science and technology by developing coordinated interagency strategies. It also retains the functions of the previously-chartered Joint Subcommittee on Ocean Science and Technology and serves as the Ocean Science and Technology Interagency Policy Committee for the National Ocean Council."
|
||||
"To advise and assist the Committee on Environment, Natural Resources, and Sustainability on national ocean science and technology issues and to advance federal ocean S&T goals by developing coordinated interagency strategies. It also serves as the National Ocean Council’s Ocean Science and Technology Interagency Policy Committee and retains the mandated functions of the prior joint subcommittee. Key roles include fostering national ocean S&T priorities; facilitating interagency coordination of research, technology, infrastructure, education, and observation/mapping programs; expanding fundamental knowledge of the ocean and its links to the Earth system and society; advancing modeling and forecasting; advising on science and technology for ecosystem-based management and stewardship; supporting use of ocean S&T in coastal and marine policy; and recommending scientific and technical assessments."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -309,7 +392,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"The immigration appeal is dismissed because the petitioner is not a U.S. citizen, and therefore, is not eligible to file a Petition for Alien Fiancé(e) (Form I-129F) on behalf of the beneficiary. The relevant law provides nonimmigrant classification only to aliens who are the fiancé(e)s of U.S. citizens."
|
||||
"The appeal was dismissed because the petitioner is not a U.S. citizen, and the K‑1 fiancé(e) classification (Form I‑129F) is available only for fiancés of U.S. citizens. The denial is without prejudice, so the petitioner may file a new I‑129F if he becomes a U.S. citizen."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -332,7 +415,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"An advance pricing agreement (APA) is a binding contract between a taxpayer and the IRS that establishes an approved transfer pricing method (TPM) for specific transactions. This agreement aims to prevent disputes over transfer pricing by ensuring that the taxpayer's tax returns for the covered years are consistent with the agreed TPM. APAs can be unilateral, involving only the taxpayer and the IRS, or bilateral/multilateral, involving agreements with one or more foreign tax authorities to avoid double taxation."
|
||||
"An advance pricing agreement (APA) is a binding contract between a taxpayer and the IRS that establishes an approved transfer pricing method (TPM) for specified related‑party (covered) transactions and tax years. If the taxpayer files its returns consistent with the agreed TPM, the IRS agrees not to seek an adjustment under IRC § 482 for those transactions. An APA can be unilateral (between the taxpayer and the IRS) or bilateral/multilateral (also agreeing with one or more foreign competent authorities), and is intended to resolve transfer‑pricing disputes in advance and, where bilateral, to reduce the risk of double taxation."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -350,7 +433,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama-parse-5ZmnAQ0r-py3.11",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -15,10 +15,15 @@
|
||||
"\n",
|
||||
"These LLM calls are expensive. Contextual retrieval depends on **prompt caching** in order to be efficient.\n",
|
||||
"\n",
|
||||
"In this notebook, we use Claude 3.5-Sonnet to generate contextual summaries. We cache the document as text tokens, but generate contextual summaries by feeding in the parsed text chunk. \n",
|
||||
"In this notebook, we use Claude 3.5-Haiku to generate contextual summaries. We cache the document as text tokens, but generate contextual summaries by feeding in the parsed text chunk. \n",
|
||||
"\n",
|
||||
"We feed both the text and image chunks into the final multimodal RAG pipeline to generate the response.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-20-2025 | 0.6.61 | Maintained |\n",
|
||||
"\n",
|
||||
""
|
||||
]
|
||||
},
|
||||
@@ -33,13 +38,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "70ccdd53-e68a-4199-aacb-cfe71ad1ff0b",
|
||||
"id": "155afa97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
"%pip install llama-cloud-services \"llama-index>=0.13.0<0.14.0\" llama-index-embeddings-voyageai llama-index-llms-anthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -47,7 +50,7 @@
|
||||
"id": "225c5556-a789-4386-a1ee-cce01dbeb6cf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup Observability\n",
|
||||
"### (Optional) Setup Observability\n",
|
||||
"\n",
|
||||
"We setup an integration with LlamaTrace (integration with Arize).\n",
|
||||
"\n",
|
||||
@@ -126,7 +129,9 @@
|
||||
"# replace with your Anthropic API key\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = \"sk-...\"\n",
|
||||
"# replace with your VoyageAI key\n",
|
||||
"os.environ[\"VOYAGE_API_KEY\"] = \"\""
|
||||
"os.environ[\"VOYAGE_API_KEY\"] = \"pa-...\"\n",
|
||||
"# replace with your LlamaCloud API key\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -134,15 +139,24 @@
|
||||
"execution_count": null,
|
||||
"id": "16e2071d-bbc2-4707-8ae7-cb4e1fecafd3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/loganmarkewich/llama_parse/py/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_index.llms.anthropic import Anthropic\n",
|
||||
"from llama_index.embeddings.voyageai import VoyageEmbedding\n",
|
||||
"from llama_index.core import Settings\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm = Anthropic(model=\"claude-3-5-sonnet-20240620\")\n",
|
||||
"embed_model = VoyageEmbedding(model_name=\"voyage-3\")\n",
|
||||
"llm = Anthropic(model=\"claude-4-sonnet-20250514\")\n",
|
||||
"embed_model = VoyageEmbedding(model_name=\"voyage-3.5\")\n",
|
||||
"\n",
|
||||
"Settings.llm = llm\n",
|
||||
"Settings.embed_model = embed_model"
|
||||
@@ -173,9 +187,12 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" premium_mode=True,\n",
|
||||
" # invalidate_cache=True\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -189,15 +206,12 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Parsing text...\n",
|
||||
"Started parsing the file under job_id a578c42a-706c-4fc8-8f60-231bc2fca434\n"
|
||||
"Started parsing the file under job_id 1384d483-16c8-4b20-a3ff-6863eafecbc1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"Parsing text...\")\n",
|
||||
"md_json_objs = parser.get_json_result(\"data/iconiq_report.pdf\")\n",
|
||||
"md_json_list = md_json_objs[0][\"pages\"]"
|
||||
"results = await parser.aparse(\"data/iconiq_report.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,50 +224,80 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"# A Decision-Making Framework\n",
|
||||
"\n",
|
||||
"When making decisions around GenAI investments, we believe it will be important to assess organization readiness, put in place a framework and processes for use case evaluation, and proactively mitigate risks\n",
|
||||
"\n",
|
||||
"## Accelerate Value\n",
|
||||
"----\n",
|
||||
"\n",
|
||||
"### Accelerate Value \n",
|
||||
"Find synergies between organizational readiness, use cases, and risk mitigation when making GenAI investment decisions\n",
|
||||
"\n",
|
||||
"### Use Case Identification & Evaluation\n",
|
||||
"----\n",
|
||||
"\n",
|
||||
"### Use Case Identification & Evaluation \n",
|
||||
"When determining use cases for GenAI, we believe stakeholders will need to assess business value, the fluency vs. accuracy of solutions, and the level of risk associated. Given the risks involved with using GenAI to build new products, many organizations are first starting with use cases for internal productivity.\n",
|
||||
"\n",
|
||||
"It is also important to implement feedback loops and a system for measuring ROI to evaluate use cases.\n",
|
||||
"\n",
|
||||
"### Organizational Readiness\n",
|
||||
"For enterprises adopting GenAI solutions for the first time, we believe it will be important to ensure various components of the organization are ready to support the development and integration needs involved. Organizational readiness components to assess could include:\n",
|
||||
"----\n",
|
||||
"\n",
|
||||
"- Employee readiness and training\n",
|
||||
"- IT / data team expertise\n",
|
||||
"- Security\n",
|
||||
"- Governance structure and policies\n",
|
||||
"- Data ecosystem maturity\n",
|
||||
"### Organizational Readiness \n",
|
||||
"For enterprises adopting GenAI solutions for the first time, we believe it will be important to ensure various components of the organization are ready to support the development and integration needs involved. \n",
|
||||
"Organizational readiness components to assess could include:\n",
|
||||
"\n",
|
||||
"### Risk Mitigation\n",
|
||||
"* Employee readiness and training \n",
|
||||
"* IT / data team expertise \n",
|
||||
"* Security \n",
|
||||
"* Governance structure and policies \n",
|
||||
"* Data ecosystem maturity \n",
|
||||
"\n",
|
||||
"----\n",
|
||||
"\n",
|
||||
"### Risk Mitigation \n",
|
||||
"We believe enterprises will need to account for various risks like data security and privacy concerns, algorithm accuracy / bias, integration complexity, etc. when evaluating GenAI solutions.\n",
|
||||
"\n",
|
||||
"Organizations can employ various strategies to mitigate some of these risks. For example, it may make sense to invest in fine-tuning or retrieval augmented generation (RAG) techniques to mitigate concerns of model accuracy.\n",
|
||||
"\n",
|
||||
"Source: Perspectives from the ICONIQ Growth GenAI Survey (June 2024) and perspectives from the ICONIQ Growth team and network of AI leaders consisting of our community of CIO/CDOs overseeing AI initiatives in enterprises, CTOs, our Technical Advisory Board, and others in our network\n",
|
||||
"\n",
|
||||
"Private & Strictly Confidential\n"
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(md_json_list[10][\"md\"])"
|
||||
"print(results.pages[10].md)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d50913fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can download the page screenshots directly, and we can use them as context later."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eeadb16c-97eb-4622-9551-b34d7f90d72f",
|
||||
"id": "056ba139",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image_dicts = parser.get_images(md_json_objs, download_path=\"data_images_iconiq\")"
|
||||
"image_nodes = await results.aget_image_nodes(\n",
|
||||
" include_object_images=False,\n",
|
||||
" include_screenshot_images=True,\n",
|
||||
" image_download_dir=\"./iconiq_images\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cda70ede",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_nodes = results.get_markdown_nodes(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -270,52 +314,6 @@
|
||||
"In this example we're indexing the text node for retrieval. The text node has a reference to both the parsed text as well as the image screenshot."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3aae2dee-9d85-4604-8a51-705d4db527f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get Text Nodes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "18c24174-05ce-417f-8dd2-79c3f375db03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"from typing import Optional"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e331dfe-a627-4e23-8c57-70ab1d9342e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get pages loaded through llamaparse\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_page_number(file_name):\n",
|
||||
" match = re.search(r\"-page_(\\d+)\\.jpg$\", str(file_name))\n",
|
||||
" if match:\n",
|
||||
" return int(match.group(1))\n",
|
||||
" return 0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _get_sorted_image_files(image_dir):\n",
|
||||
" \"\"\"Get image files sorted by page.\"\"\"\n",
|
||||
" raw_files = [\n",
|
||||
" f for f in list(Path(image_dir).iterdir()) if f.is_file() and \"-page\" in str(f)\n",
|
||||
" ]\n",
|
||||
" sorted_files = sorted(raw_files, key=get_page_number)\n",
|
||||
" return sorted_files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -323,40 +321,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from copy import deepcopy\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# attach image metadata to the text nodes\n",
|
||||
"def get_text_nodes(image_dir, json_dicts):\n",
|
||||
" \"\"\"Split docs into nodes, by separator.\"\"\"\n",
|
||||
" nodes = []\n",
|
||||
"\n",
|
||||
" image_files = _get_sorted_image_files(image_dir)\n",
|
||||
" md_texts = [d[\"md\"] for d in json_dicts]\n",
|
||||
"\n",
|
||||
" for idx, md_text in enumerate(md_texts):\n",
|
||||
" chunk_metadata = {\"page_num\": idx + 1}\n",
|
||||
" chunk_metadata[\"image_path\"] = str(image_files[idx])\n",
|
||||
" chunk_metadata[\"parsed_text_markdown\"] = md_texts[idx]\n",
|
||||
" node = TextNode(\n",
|
||||
" text=\"\",\n",
|
||||
" metadata=chunk_metadata,\n",
|
||||
" )\n",
|
||||
" nodes.append(node)\n",
|
||||
"\n",
|
||||
" return nodes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f591669c-5a8e-491d-9cef-0b754abbf26f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# this will split into pages\n",
|
||||
"text_nodes = get_text_nodes(image_dir=\"data_images_iconiq\", json_dicts=md_json_list)"
|
||||
"for text_node, image_node in zip(text_nodes, image_nodes):\n",
|
||||
" text_node.metadata[\"image_path\"] = image_node.image_path"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -369,19 +335,18 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_num: 1\n",
|
||||
"image_path: data_images_iconiq/11f19cc3-c02e-4271-a84f-9a043457fd69-page_1.jpg\n",
|
||||
"parsed_text_markdown: September 2024\n",
|
||||
"page_number: 1\n",
|
||||
"file_name: data/iconiq_report.pdf\n",
|
||||
"image_path: iconiq_images/page_1.jpg\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# The State of AI\n",
|
||||
"\n",
|
||||
"Navigating the present and promise\n",
|
||||
"of Generative AI\n",
|
||||
"September 2024\n",
|
||||
"\n",
|
||||
"ICONIQ | Growth\n",
|
||||
"Navigating the present and promise of Generative AI\n",
|
||||
"\n",
|
||||
"Private and Strictly Confidential\n",
|
||||
"Copyright © 2024 ICONIQ Capital, LLC. All Rights Reserved\n"
|
||||
"ICONIQ | Growth\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -409,8 +374,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from copy import deepcopy\n",
|
||||
"from llama_index.core.llms import ChatMessage\n",
|
||||
"from llama_index.core.prompts import ChatPromptTemplate\n",
|
||||
"from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, CachePoint\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -424,8 +388,9 @@
|
||||
"Here is the chunk we want to situate within the whole document\n",
|
||||
"<chunk>\n",
|
||||
"{CHUNK_CONTENT}\n",
|
||||
"</chunk>\n",
|
||||
"Please give a short succinct context to situate this chunk within the overall document for \\\n",
|
||||
"</chunk>\"\"\"\n",
|
||||
"\n",
|
||||
"suffix_text = \"\"\"Please give a short succinct context to situate this chunk within the overall document for \\\n",
|
||||
"the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -441,28 +406,26 @@
|
||||
" new_node = deepcopy(node)\n",
|
||||
"\n",
|
||||
" messages = [\n",
|
||||
" ChatMessage(role=\"system\", content=\"You are a helpful AI Assistant.\"),\n",
|
||||
" ChatMessage(\n",
|
||||
" role=\"user\",\n",
|
||||
" content=[\n",
|
||||
" {\n",
|
||||
" \"text\": whole_doc_text.format(WHOLE_DOCUMENT=doc_text),\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"cache_control\": {\"type\": \"ephemeral\"},\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"text\": chunk_text.format(\n",
|
||||
" blocks=[\n",
|
||||
" TextBlock(text=whole_doc_text.format(WHOLE_DOCUMENT=doc_text)),\n",
|
||||
" CachePoint(cache_control={\"type\": \"ephemeral\"}),\n",
|
||||
" TextBlock(\n",
|
||||
" text=chunk_text.format(\n",
|
||||
" CHUNK_CONTENT=node.get_content(metadata_mode=\"all\")\n",
|
||||
" ),\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" ),\n",
|
||||
" TextBlock(\n",
|
||||
" text=\"And here is the page screenshot for the corresponding chunk:\"\n",
|
||||
" ),\n",
|
||||
" ImageBlock(path=node.metadata[\"image_path\"]),\n",
|
||||
" TextBlock(text=suffix_text),\n",
|
||||
" ],\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" new_response = llm.chat(\n",
|
||||
" messages, extra_headers={\"anthropic-beta\": \"prompt-caching-2024-07-31\"}\n",
|
||||
" )\n",
|
||||
" new_response = llm.chat(messages)\n",
|
||||
" new_node.metadata[\"context\"] = str(new_response)\n",
|
||||
"\n",
|
||||
" nodes_modified.append(new_node)\n",
|
||||
@@ -481,52 +444,54 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Completed node 0, 3.079681158065796\n",
|
||||
"Completed node 1, 2.306105136871338\n",
|
||||
"Completed node 2, 2.9272632598876953\n",
|
||||
"Completed node 3, 2.7051072120666504\n",
|
||||
"Completed node 4, 2.5174269676208496\n",
|
||||
"Completed node 5, 2.593230962753296\n",
|
||||
"Completed node 6, 17.79446506500244\n",
|
||||
"Completed node 7, 2.357940912246704\n",
|
||||
"Completed node 8, 22.41524910926819\n",
|
||||
"Completed node 9, 2.3640670776367188\n",
|
||||
"Completed node 10, 24.634361743927002\n",
|
||||
"Completed node 11, 3.069308042526245\n",
|
||||
"Completed node 12, 23.27754497528076\n",
|
||||
"Completed node 13, 3.3801419734954834\n",
|
||||
"Completed node 14, 22.186962842941284\n",
|
||||
"Completed node 15, 2.9594428539276123\n",
|
||||
"Completed node 16, 22.680989027023315\n",
|
||||
"Completed node 17, 2.8793280124664307\n",
|
||||
"Completed node 18, 22.91075611114502\n",
|
||||
"Completed node 19, 2.824723958969116\n",
|
||||
"Completed node 20, 23.572262287139893\n",
|
||||
"Completed node 21, 2.9115028381347656\n",
|
||||
"Completed node 22, 22.8908531665802\n",
|
||||
"Completed node 23, 2.2966439723968506\n",
|
||||
"Completed node 24, 23.58935308456421\n",
|
||||
"Completed node 25, 2.6247501373291016\n",
|
||||
"Completed node 26, 22.399968147277832\n",
|
||||
"Completed node 27, 3.0899431705474854\n",
|
||||
"Completed node 28, 22.961134910583496\n",
|
||||
"Completed node 29, 3.1315767765045166\n",
|
||||
"Completed node 30, 22.38727903366089\n",
|
||||
"Completed node 31, 2.507817268371582\n",
|
||||
"Completed node 32, 23.75781512260437\n",
|
||||
"Completed node 33, 3.65451717376709\n",
|
||||
"Completed node 34, 22.2336208820343\n",
|
||||
"Completed node 35, 2.84831166267395\n",
|
||||
"Completed node 36, 23.35297417640686\n",
|
||||
"Completed node 37, 3.027301073074341\n",
|
||||
"Completed node 38, 22.720845937728882\n",
|
||||
"Completed node 39, 2.849353313446045\n",
|
||||
"Completed node 40, 24.094517946243286\n"
|
||||
"Completed node 0, 5.0501158237457275\n",
|
||||
"Completed node 1, 4.125281095504761\n",
|
||||
"Completed node 2, 3.700598955154419\n",
|
||||
"Completed node 3, 4.249290943145752\n",
|
||||
"Completed node 4, 4.552713871002197\n",
|
||||
"Completed node 5, 3.700002908706665\n",
|
||||
"Completed node 6, 4.9324049949646\n",
|
||||
"Completed node 7, 6.246585845947266\n",
|
||||
"Completed node 8, 5.678989887237549\n",
|
||||
"Completed node 9, 4.55932092666626\n",
|
||||
"Completed node 10, 4.865902662277222\n",
|
||||
"Completed node 11, 4.376728057861328\n",
|
||||
"Completed node 12, 3.823659896850586\n",
|
||||
"Completed node 13, 4.069238185882568\n",
|
||||
"Completed node 14, 3.7528319358825684\n",
|
||||
"Completed node 15, 3.789531946182251\n",
|
||||
"Completed node 16, 4.54377818107605\n",
|
||||
"Completed node 17, 3.3560800552368164\n",
|
||||
"Completed node 18, 4.519093990325928\n",
|
||||
"Completed node 19, 5.594789028167725\n",
|
||||
"Completed node 20, 3.7624330520629883\n",
|
||||
"Completed node 21, 3.778661012649536\n",
|
||||
"Completed node 22, 3.895768880844116\n",
|
||||
"Completed node 23, 3.6451258659362793\n",
|
||||
"Completed node 24, 9.422847032546997\n",
|
||||
"Completed node 25, 3.954685926437378\n",
|
||||
"Completed node 26, 3.4985830783843994\n",
|
||||
"Completed node 27, 3.368708848953247\n",
|
||||
"Completed node 28, 3.9136807918548584\n",
|
||||
"Completed node 29, 3.791595935821533\n",
|
||||
"Completed node 30, 3.1155011653900146\n",
|
||||
"Completed node 31, 3.9999842643737793\n",
|
||||
"Completed node 32, 3.654320001602173\n",
|
||||
"Completed node 33, 3.854135036468506\n",
|
||||
"Completed node 34, 3.843966007232666\n",
|
||||
"Completed node 35, 4.019424915313721\n",
|
||||
"Completed node 36, 9.035747766494751\n",
|
||||
"Completed node 37, 5.066689968109131\n",
|
||||
"Completed node 38, 7.529208660125732\n",
|
||||
"Completed node 39, 4.811733961105347\n",
|
||||
"Completed node 40, 2.8257930278778076\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"new_text_nodes = create_contextual_nodes(text_nodes, llm)"
|
||||
"context_llm = Anthropic(model=\"claude-3-5-haiku-latest\")\n",
|
||||
"\n",
|
||||
"new_text_nodes = create_contextual_nodes(text_nodes, context_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -546,25 +511,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from llama_index.core import (\n",
|
||||
" StorageContext,\n",
|
||||
" VectorStoreIndex,\n",
|
||||
" load_index_from_storage,\n",
|
||||
")\n",
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"\n",
|
||||
"if not os.path.exists(\"storage_nodes_iconiq\"):\n",
|
||||
" index = VectorStoreIndex(new_text_nodes, embed_model=embed_model)\n",
|
||||
" # save index to disk\n",
|
||||
" index.set_index_id(\"vector_index\")\n",
|
||||
" index.storage_context.persist(\"./storage_nodes_iconiq\")\n",
|
||||
"else:\n",
|
||||
" # rebuild storage context\n",
|
||||
" storage_context = StorageContext.from_defaults(persist_dir=\"storage_nodes_iconiq\")\n",
|
||||
" # load index\n",
|
||||
" index = load_index_from_storage(storage_context, index_id=\"vector_index\")\n",
|
||||
"\n",
|
||||
"retriever = index.as_retriever()"
|
||||
"index = VectorStoreIndex(nodes=new_text_nodes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -584,18 +533,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.exists(\"storage_nodes_iconiq_base\"):\n",
|
||||
" base_index = VectorStoreIndex(text_nodes, embed_model=embed_model)\n",
|
||||
" # save index to disk\n",
|
||||
" base_index.set_index_id(\"vector_index\")\n",
|
||||
" base_index.storage_context.persist(\"./storage_nodes_iconiq_base\")\n",
|
||||
"else:\n",
|
||||
" # rebuild storage context\n",
|
||||
" storage_context = StorageContext.from_defaults(\n",
|
||||
" persist_dir=\"storage_nodes_iconiq_base\"\n",
|
||||
" )\n",
|
||||
" # load index\n",
|
||||
" base_index = load_index_from_storage(storage_context, index_id=\"vector_index\")"
|
||||
"base_index = VectorStoreIndex(text_nodes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -615,75 +553,76 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.query_engine import CustomQueryEngine, SimpleMultiModalQueryEngine\n",
|
||||
"from llama_index.core.query_engine import CustomQueryEngine\n",
|
||||
"from llama_index.core.retrievers import BaseRetriever\n",
|
||||
"from llama_index.multi_modal_llms.openai import OpenAIMultiModal\n",
|
||||
"from llama_index.core.schema import ImageNode, NodeWithScore, MetadataMode\n",
|
||||
"from llama_index.core.prompts import PromptTemplate\n",
|
||||
"from llama_index.core.schema import MetadataMode\n",
|
||||
"from llama_index.core.base.response.schema import Response\n",
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"gpt_4o = OpenAIMultiModal(model=\"gpt-4o\", max_new_tokens=4096)\n",
|
||||
"\n",
|
||||
"QA_PROMPT_TMPL = \"\"\"\\\n",
|
||||
"qa_prompt_block_text = \"\"\"\\\n",
|
||||
"Below we give parsed text from slides in two different formats, as well as the image.\n",
|
||||
"\n",
|
||||
"---------------------\n",
|
||||
"{context_str}\n",
|
||||
"---------------------\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"image_prefix_block = TextBlock(text=\"And here are the corresponding images per page\\n\")\n",
|
||||
"\n",
|
||||
"image_suffix = \"\"\"\\\n",
|
||||
"Given the context information and not prior knowledge, answer the query. Explain whether you got the answer\n",
|
||||
"from the parsed markdown or raw text or image, and if there's discrepancies, and your reasoning for the final answer.\n",
|
||||
"\n",
|
||||
"Query: {query_str}\n",
|
||||
"Answer: \"\"\"\n",
|
||||
"\n",
|
||||
"QA_PROMPT = PromptTemplate(QA_PROMPT_TMPL)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class MultimodalQueryEngine(CustomQueryEngine):\n",
|
||||
" \"\"\"Custom multimodal Query Engine.\n",
|
||||
"\n",
|
||||
" Takes in a retriever to retrieve a set of document nodes.\n",
|
||||
" Also takes in a prompt template and multimodal model.\n",
|
||||
" Takes in a retriever to retrieve a set of document nodes and respond using an LLM + retrieved text/images.\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" qa_prompt: PromptTemplate\n",
|
||||
" retriever: BaseRetriever\n",
|
||||
" multi_modal_llm: OpenAIMultiModal\n",
|
||||
" llm: Anthropic\n",
|
||||
"\n",
|
||||
" def __init__(self, qa_prompt: Optional[PromptTemplate] = None, **kwargs) -> None:\n",
|
||||
" def __init__(self, **kwargs) -> None:\n",
|
||||
" \"\"\"Initialize.\"\"\"\n",
|
||||
" super().__init__(qa_prompt=qa_prompt or QA_PROMPT, **kwargs)\n",
|
||||
" super().__init__(**kwargs)\n",
|
||||
"\n",
|
||||
" def custom_query(self, query_str: str):\n",
|
||||
" # retrieve text nodes\n",
|
||||
" nodes = self.retriever.retrieve(query_str)\n",
|
||||
" # create ImageNode items from text nodes\n",
|
||||
" image_nodes = [\n",
|
||||
" NodeWithScore(node=ImageNode(image_path=n.metadata[\"image_path\"]))\n",
|
||||
" image_blocks = [\n",
|
||||
" ImageBlock(path=n.metadata[\"image_path\"])\n",
|
||||
" for n in nodes\n",
|
||||
" if n.metadata.get(\"image_path\")\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" # create context string from text nodes, dump into the prompt\n",
|
||||
" context_str = \"\\n\\n\".join(\n",
|
||||
" [r.get_content(metadata_mode=MetadataMode.LLM) for r in nodes]\n",
|
||||
" )\n",
|
||||
" fmt_prompt = self.qa_prompt.format(context_str=context_str, query_str=query_str)\n",
|
||||
"\n",
|
||||
" formatted_msg = ChatMessage(\n",
|
||||
" role=\"user\",\n",
|
||||
" blocks=[\n",
|
||||
" TextBlock(text=qa_prompt_block_text.format(context_str=context_str)),\n",
|
||||
" image_prefix_block,\n",
|
||||
" *image_blocks,\n",
|
||||
" TextBlock(text=image_suffix.format(query_str=query_str)),\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # synthesize an answer from formatted text and images\n",
|
||||
" llm_response = self.multi_modal_llm.complete(\n",
|
||||
" prompt=fmt_prompt,\n",
|
||||
" image_documents=[image_node.node for image_node in image_nodes],\n",
|
||||
" )\n",
|
||||
" return Response(\n",
|
||||
" response=str(llm_response),\n",
|
||||
" source_nodes=nodes,\n",
|
||||
" metadata={\"text_nodes\": nodes, \"image_nodes\": image_nodes},\n",
|
||||
" )\n",
|
||||
" llm_response = self.llm.chat([formatted_msg])\n",
|
||||
"\n",
|
||||
" return response"
|
||||
" return Response(\n",
|
||||
" response=str(llm_response.message.content),\n",
|
||||
" source_nodes=nodes,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -694,11 +633,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_engine = MultimodalQueryEngine(\n",
|
||||
" retriever=index.as_retriever(similarity_top_k=3), multi_modal_llm=gpt_4o\n",
|
||||
" retriever=index.as_retriever(similarity_top_k=3),\n",
|
||||
" llm=Anthropic(model=\"claude-4-sonnet-20250514\"),\n",
|
||||
")\n",
|
||||
"base_query_engine = MultimodalQueryEngine(\n",
|
||||
" retriever=base_index.as_retriever(similarity_top_k=3), multi_modal_llm=gpt_4o\n",
|
||||
")"
|
||||
"\n",
|
||||
"base_query_engine = base_index.as_query_engine(similarity_top_k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -716,23 +655,7 @@
|
||||
"execution_count": null,
|
||||
"id": "0fd1aae3-1f8a-4797-a24a-17e563a7165e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The departments that use generative AI the most are:\n",
|
||||
"\n",
|
||||
"1. **AI, Machine Learning, and Data Science**: With a score of 4.5, this department leads in generative AI usage. They likely use AI for advanced data analysis, model development, and improving AI algorithms.\n",
|
||||
"\n",
|
||||
"2. **IT**: Scoring 4.0, IT teams use generative AI for ticket management, chatbots, customer support, troubleshooting, and knowledge management.\n",
|
||||
"\n",
|
||||
"3. **Engineering / R&D**: With a score of 3.9, they use AI to improve coding velocity, refactor code, augment test cases, summarize business requirements, accelerate code reviews, conduct user research, and prototype.\n",
|
||||
"\n",
|
||||
"These insights are derived from the parsed markdown text, which provides detailed scores and use cases for each department. The image confirms this information, showing the same scores and use cases. There are no discrepancies between the parsed text and the image.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = query_engine.query(\n",
|
||||
" \"which departments/teams use genAI the most and how are they using it?\"\n",
|
||||
@@ -745,27 +668,7 @@
|
||||
"execution_count": null,
|
||||
"id": "c9cc48ee-481b-40b1-91b3-c69220e9dfb0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Based on the parsed text from the slides:\n",
|
||||
"\n",
|
||||
"1. **Departments/Teams Using GenAI the Most:**\n",
|
||||
" - **AI, Machine Learning, and Data Science**: Highest usage with a score of 4.5.\n",
|
||||
" - **IT**: Score of 4.0.\n",
|
||||
" - **Engineering/R&D**: Score of 3.9.\n",
|
||||
"\n",
|
||||
"2. **How They Are Using GenAI:**\n",
|
||||
" - **AI, Machine Learning, and Data Science**: Likely using GenAI for advanced analytics and model development.\n",
|
||||
" - **IT**: Utilizes GenAI for internal productivity, IT operations, and software code development.\n",
|
||||
" - **Engineering/R&D**: Uses GenAI for improving coding velocity, code refactoring, augmenting test cases, and accelerating code reviews.\n",
|
||||
"\n",
|
||||
"The information was derived from the parsed markdown text. There are no discrepancies between the parsed text and the images provided. The parsed text clearly outlines the departments with the highest GenAI usage and their specific applications.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_response = base_query_engine.query(\n",
|
||||
" \"which departments/teams use genAI the most and how are they using it?\"\n",
|
||||
@@ -773,84 +676,6 @@
|
||||
"print(str(base_response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7b906cb8-07ba-4a8c-9ff8-5162869ad408",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**NOTE**: the relevant page numbers are 32-38. The response with contextual retrieval retrieves the slide detailing IT use cases, hence giving a more detailed response on the IT side."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5b7a8c5f-39fc-4d04-8c56-3642f5718437",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"32,33,34\n",
|
||||
"32,21,33\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_source_page_nums(response)\n",
|
||||
"get_source_page_nums(base_response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85a2e748-cc40-4b9f-9401-2ea912839502",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_num: 32\n",
|
||||
"image_path: data_images_iconiq/11f19cc3-c02e-4271-a84f-9a043457fd69-page_32.jpg\n",
|
||||
"parsed_text_markdown: # AI Usage by Function\n",
|
||||
"\n",
|
||||
"Technical teams lead in adoption of generative AI for internal productivity, while HR and legal functions lag, likely hindered by data privacy and quality concerns\n",
|
||||
"\n",
|
||||
"For each department / function in your company, please indicate their level of generative AI usage on a scale of 1-5.\n",
|
||||
"Weighted Average Score by % of Respondents (N = 143)\n",
|
||||
"\n",
|
||||
"| Department/Function | Score |\n",
|
||||
"|---------------------|-------|\n",
|
||||
"| AI, Machine Learning, and Data Science | 4.5 |\n",
|
||||
"| IT | 4.0 |\n",
|
||||
"| Engineering / R&D | 3.9 |\n",
|
||||
"| Product Development & Management | 3.5 |\n",
|
||||
"| Marketing | 3.4 |\n",
|
||||
"| Operations | 3.3 |\n",
|
||||
"| Strategy and Competitive Intelligence | 3.3 |\n",
|
||||
"| Sales | 3.2 |\n",
|
||||
"| Finance | 3.0 |\n",
|
||||
"| Administration | 2.9 |\n",
|
||||
"| Human Resources | 2.7 |\n",
|
||||
"| Legal | 2.7 |\n",
|
||||
"\n",
|
||||
"> We are creating a sense of artificial FOMO among our workforce to encourage participation in pilot groups that will have early access to new GenAI tools\n",
|
||||
"> \n",
|
||||
"> Chief Information Officer, Technology Company\n",
|
||||
"\n",
|
||||
"Source: Perspectives from the ICONIQ Growth GenAI Survey (June 2024) and perspectives from the ICONIQ Growth team and network of AI leaders consisting of our community of CIO/CDOs overseeing AI initiatives in enterprises, CTOs, our Technical Advisory Board, and others in our network\n",
|
||||
"\n",
|
||||
"Private & Strictly Confidential\n",
|
||||
"context: assistant: This chunk is part of the \"Deep Dive on Applications\" section of the report, providing data on AI adoption across different business functions. It shows which departments are leading in generative AI usage, with technical teams at the forefront and HR/legal lagging behind.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# look at an example retrieved source node\n",
|
||||
"print(response.source_nodes[0].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a9462a82-960a-4c42-bbca-a1e71c2c1e5c",
|
||||
@@ -864,26 +689,7 @@
|
||||
"execution_count": null,
|
||||
"id": "e8a0c8b1-3a3e-41c1-9916-01fdfb0dd8e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The \"Deep Dive on Infrastructure\" section provides insights into deployment environments and infrastructure tooling for generative AI models:\n",
|
||||
"\n",
|
||||
"1. **Deployment Environments**:\n",
|
||||
" - Enterprises primarily use cloud or hybrid approaches for hosting generative AI workloads.\n",
|
||||
" - 56% of respondents prefer cloud deployment, while 42% use a hybrid method.\n",
|
||||
" - AWS (68%) and Azure (61%) are the most utilized cloud service providers, with Google Cloud at 40%.\n",
|
||||
"\n",
|
||||
"2. **Infrastructure Tooling**:\n",
|
||||
" - Enterprises are investing in infrastructure tools for data observability, database augmentation, and data pre-processing.\n",
|
||||
" - Key areas for infrastructure tooling include observability, evaluation, and security (50%), databases (48%), and data pre-processing (47%).\n",
|
||||
"\n",
|
||||
"These insights were derived from the parsed markdown text, which provides detailed information on deployment preferences and infrastructure investments. There are no discrepancies between the parsed text and the images provided.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"what are relevant insights from the 'deep dive on infrastructure' section in terms of model preferences, cost, deployment environments?\"\n",
|
||||
"\n",
|
||||
@@ -896,101 +702,18 @@
|
||||
"execution_count": null,
|
||||
"id": "0f1638c6-ca29-462b-a21f-a2941968259c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The \"Deep Dive on Infrastructure\" section does not provide specific insights on model preferences, cost, or deployment environments based on the parsed text. The slide titled \"Deep Dive on Infrastructure\" only contains the title and copyright information, without any detailed content or data.\n",
|
||||
"\n",
|
||||
"This conclusion is drawn from the parsed markdown text, which lacks any specific information on model preferences, cost, or deployment environments in that section. The image confirms this, as it only shows the title and a graphic without additional details.\n",
|
||||
"\n",
|
||||
"If you need insights on these topics, you might want to refer to other sections or slides that specifically address model preferences, costs, or deployment environments.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_response = base_query_engine.query(query)\n",
|
||||
"print(str(base_response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9d6eb745-b3d3-4e37-bb2d-d2d649d77d01",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"24,30,26\n",
|
||||
"30,17,24\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_source_page_nums(response)\n",
|
||||
"get_source_page_nums(base_response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bc741ad9-47da-47e7-b1b2-540d686c0bf4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_num: 26\n",
|
||||
"image_path: data_images_iconiq/11f19cc3-c02e-4271-a84f-9a043457fd69-page_26.jpg\n",
|
||||
"parsed_text_markdown: # Cloud Deployment Method\n",
|
||||
"\n",
|
||||
"Enterprises are primarily hosting generative AI workloads on the cloud or via a hybrid approach; AWS and Azure are the most utilized cloud service providers\n",
|
||||
"\n",
|
||||
"## Preferred Deployment Method for GenAI Models\n",
|
||||
"% of Respondents (N = 126)\n",
|
||||
"\n",
|
||||
"| Method | Percentage |\n",
|
||||
"|----------|------------|\n",
|
||||
"| On-prem | 2% |\n",
|
||||
"| Hybrid | 42% |\n",
|
||||
"| Cloud | 56% |\n",
|
||||
"\n",
|
||||
"## CSP Used for GenAI Products\n",
|
||||
"Multi-Select, % of Respondents (N = 218)\n",
|
||||
"\n",
|
||||
"| Cloud Service Provider | Percentage |\n",
|
||||
"|----------------------------|------------|\n",
|
||||
"| Amazon Web Services (AWS) | 68% |\n",
|
||||
"| Microsoft Azure | 61% |\n",
|
||||
"| Google Cloud (GCP) | 40% |\n",
|
||||
"| Other | 3% |\n",
|
||||
"\n",
|
||||
"While Azure has captured mindshare with its OpenAI, Amazon remains ahead in terms of cloud usage given the dominant market share AWS has in cloud¹\n",
|
||||
"\n",
|
||||
"Notes: (1) Statista Worldwide Market Share of Leading Cloud Infrastructure Service Providers (May 2024)\n",
|
||||
"\n",
|
||||
"Source: Perspectives from the ICONIQ Growth GenAI Survey (June 2024) and perspectives from the ICONIQ Growth team and network of AI leaders consisting of our community of CIO/CDOs overseeing AI initiatives in enterprises, CTOs, our Technical Advisory Board, and others in our network\n",
|
||||
"\n",
|
||||
"Private & Strictly Confidential\n",
|
||||
"context: assistant: This chunk is part of the \"Deep Dive on Infrastructure\" section of the report, discussing cloud deployment methods and cloud service providers used for generative AI workloads by enterprises. It follows sections on key purchasing criteria for AI models and precedes information on proprietary vs open source models.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# look at an example retrieved source node\n",
|
||||
"print(response.source_nodes[2].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -19,6 +19,11 @@
|
||||
"- **Robustness**: This solution is more robust than a pure text or even a pure image-based approach. In a pure text RAG approach, the parsing piece can be lossy. In a pure image-based approach, multimodal OCR is not perfect and may lose out against text parsing for text-heavy documents.\n",
|
||||
"- **Cost Optimization**: You may choose to dynamically include text-only, or text + image depending on the content of the page.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-20-2025 | 0.6.61 | Maintained |\n",
|
||||
"\n",
|
||||
""
|
||||
]
|
||||
},
|
||||
@@ -33,13 +38,24 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "70ccdd53-e68a-4199-aacb-cfe71ad1ff0b",
|
||||
"id": "73542086",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"%pip install llama-cloud-services \"llama-index>=0.13.0<0.14.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a4518afd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-\"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -47,7 +63,7 @@
|
||||
"id": "225c5556-a789-4386-a1ee-cce01dbeb6cf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup Observability\n",
|
||||
"### (Optional) Setup Observability\n",
|
||||
"\n",
|
||||
"We setup an integration with LlamaTrace (integration with Arize).\n",
|
||||
"\n",
|
||||
@@ -126,7 +142,7 @@
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"\n",
|
||||
"embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\")\n",
|
||||
"llm = OpenAI(model=\"gpt-4o\")\n",
|
||||
"llm = OpenAI(model=\"gpt-5-mini\")\n",
|
||||
"\n",
|
||||
"Settings.embed_model = embed_model\n",
|
||||
"Settings.llm = llm"
|
||||
@@ -139,11 +155,7 @@
|
||||
"source": [
|
||||
"## Use LlamaParse to Parse Text and Images\n",
|
||||
"\n",
|
||||
"In this example, use LlamaParse to parse both the text and images from the document.\n",
|
||||
"\n",
|
||||
"We parse out the text in two ways: \n",
|
||||
"- in regular `text` mode using our default text layout algorithm\n",
|
||||
"- in `markdown` mode using GPT-4o (`gpt4o_mode=True`). This also allows us to capture page screenshots"
|
||||
"In this example, use LlamaParse to parse both the text and images from the document."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -156,8 +168,14 @@
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"parser_text = LlamaParse(result_type=\"text\")\n",
|
||||
"parser_gpt4o = LlamaParse(result_type=\"markdown\", gpt4o_mode=True)"
|
||||
"parser = LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" adaptive_long_table=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -170,19 +188,13 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Parsing text...\n",
|
||||
"Started parsing the file under job_id 62f157a9-9ef9-4e5b-95ac-67093fa25800\n",
|
||||
"..........Parsing PDF file...\n",
|
||||
"Started parsing the file under job_id 1ddd5654-062b-4e19-b488-d66efc9c509d\n"
|
||||
"Started parsing the file under job_id 2cf07879-5bdb-4dca-9a07-001b2a07727e\n",
|
||||
"."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"Parsing text...\")\n",
|
||||
"docs_text = parser_text.load_data(\"data/conocophillips.pdf\")\n",
|
||||
"print(f\"Parsing PDF file...\")\n",
|
||||
"md_json_objs = parser_gpt4o.get_json_result(\"data/conocophillips.pdf\")\n",
|
||||
"md_json_list = md_json_objs[0][\"pages\"]"
|
||||
"results = await parser.aparse(\"data/conocophillips.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -195,36 +207,123 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"# Commitment to Disciplined Reinvestment Rate\n",
|
||||
"\n",
|
||||
"| Period | Description | Reinvestment Rate | WTI Average |\n",
|
||||
"|--------------|--------------------------------------|-------------------|-------------|\n",
|
||||
"| 2012-2016 | Industry Growth Focus | >100% | ~$75/BBL |\n",
|
||||
"| 2017-2022 | ConocoPhillips Strategy Reset | <60% | ~$63/BBL |\n",
|
||||
"| 2023E | | | at $80/BBL |\n",
|
||||
"| 2024-2028 | Disciplined Reinvestment Rate | ~50% | at $60/BBL |\n",
|
||||
"| 2029-2032 | | ~6% CFO CAGR | at $60/BBL |\n",
|
||||
"<table>\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
" <th>Industry Growth Focus</th>\n",
|
||||
" <th>ConocoPhillips Strategy Reset</th>\n",
|
||||
" <th>Disciplined Reinvestment Rate is the Foundation for Superior Returns <br> <b>on and of</b> Capital, while Driving Durable CFO Growth</th>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
" <td style=\"text-align:center;\">>100%<br>Reinvestment Rate</td>\n",
|
||||
" <td style=\"text-align:center;\"><60%<br>Reinvestment Rate</td>\n",
|
||||
" <td style=\"text-align:center; font-weight:bold; color:#0055ff;\">\n",
|
||||
" ~50%<br>10-Year Reinvestment Rate<br><br>\n",
|
||||
" ~6%<br>CFO CAGR 2024-2032<br><br>\n",
|
||||
" at $60/BBL WTI<br>Mid-Cycle Planning Price\n",
|
||||
" </td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
" <td>\n",
|
||||
" <div style=\"height:150px; width:50px; background-color:#b0b0b0; margin: 0 auto; position:relative;\">\n",
|
||||
" <div style=\"position:absolute; bottom:0; width:100%; height:105%; background-color:#b0b0b0;\"></div>\n",
|
||||
" <div style=\"position:absolute; bottom:0; width:100%; text-align:center; color:#fff; font-weight:bold;\">~$75/BBL<br>WTI Average</div>\n",
|
||||
" </div>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <div style=\"height:150px; width:50px; background-color:#b0b0b0; margin: 0 auto; position:relative;\">\n",
|
||||
" <div style=\"position:absolute; bottom:0; width:100%; height:56%; background-color:#b0b0b0;\"></div>\n",
|
||||
" <div style=\"position:absolute; bottom:0; width:100%; text-align:center; color:#fff; font-weight:bold;\">~$63/BBL<br>WTI Average</div>\n",
|
||||
" </div>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"- **Historic Reinvestment Rate**: Gray bars\n",
|
||||
"- **Reinvestment Rate at $60/BBL WTI**: Blue bars\n",
|
||||
"- **Reinvestment Rate at $80/BBL WTI**: Dashed blue lines\n",
|
||||
"<table>\n",
|
||||
" <thead>\n",
|
||||
" <tr>\n",
|
||||
" <th>Year</th>\n",
|
||||
" <th>Reinvestment Rate at $60/BBL WTI</th>\n",
|
||||
" <th>Reinvestment Rate at $80/BBL WTI</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <td>2023E</td>\n",
|
||||
" <td style=\"background-color:#3399ff; color:#fff; text-align:center;\">~50%</td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
"<tr>\n",
|
||||
" <td>2024-2028</td>\n",
|
||||
" <td style=\"background-color:#0033cc; color:#fff; text-align:center;\">~55%</td>\n",
|
||||
" <td style=\"border-top: 2px dashed #3399ff; text-align:center;\">at $80/BBL WTI</td>\n",
|
||||
" </tr>\n",
|
||||
"<tr>\n",
|
||||
" <td>2029-2032</td>\n",
|
||||
" <td style=\"background-color:#0033cc; color:#fff; text-align:center;\">~38%</td>\n",
|
||||
" <td style=\"border-top: 2px dashed #3399ff; text-align:center;\">at $80/BBL WTI</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
" </table>\n",
|
||||
"\n",
|
||||
"Reinvestment rate and cash from operations (CFO) are non-GAAP measures. Definitions and reconciliations are included in the Appendix.\n"
|
||||
" </td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
" <td colspan=\"3\" style=\"text-align:center; font-size:0.8em; color:#666;\">\n",
|
||||
" Historic Reinvestment Rate (gray) | Reinvestment Rate at $60/BBL WTI (blue solid) | Reinvestment Rate at $80/BBL WTI (blue dashed)\n",
|
||||
" </td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
" <td colspan=\"3\" style=\"font-size:0.75em; color:#999; padding-top:10px;\">\n",
|
||||
" Reinvestment rate and cash from operations (CFO) are non-GAAP measures. Definitions and reconciliations are included in the Appendix.\n",
|
||||
" </td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(md_json_list[10][\"md\"])"
|
||||
"print(results.pages[10].md)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb5ec429",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can download the page screenshots directly, and we can use them as context later."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eeadb16c-97eb-4622-9551-b34d7f90d72f",
|
||||
"id": "27773ef0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image_dicts = parser_gpt4o.get_images(md_json_objs, download_path=\"data_images\")"
|
||||
"image_nodes = await results.aget_image_nodes(\n",
|
||||
" include_object_images=False,\n",
|
||||
" include_screenshot_images=True,\n",
|
||||
" image_download_dir=\"./slide_images\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d0ea7a69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_nodes = results.get_markdown_nodes(split_by_page=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -256,8 +355,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"from typing import Optional"
|
||||
"for text_node, image_node in zip(text_nodes, image_nodes):\n",
|
||||
" text_node.metadata[\"image_path\"] = image_node.image_path"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -265,124 +364,24 @@
|
||||
"execution_count": null,
|
||||
"id": "8e331dfe-a627-4e23-8c57-70ab1d9342e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get pages loaded through llamaparse\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_page_number(file_name):\n",
|
||||
" match = re.search(r\"-page-(\\d+)\\.jpg$\", str(file_name))\n",
|
||||
" if match:\n",
|
||||
" return int(match.group(1))\n",
|
||||
" return 0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _get_sorted_image_files(image_dir):\n",
|
||||
" \"\"\"Get image files sorted by page.\"\"\"\n",
|
||||
" raw_files = [f for f in list(Path(image_dir).iterdir()) if f.is_file()]\n",
|
||||
" sorted_files = sorted(raw_files, key=get_page_number)\n",
|
||||
" return sorted_files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "346fe5ef-171e-4a54-9084-7a7805103a13",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from copy import deepcopy\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# attach image metadata to the text nodes\n",
|
||||
"def get_text_nodes(docs, image_dir=None, json_dicts=None):\n",
|
||||
" \"\"\"Split docs into nodes, by separator.\"\"\"\n",
|
||||
" nodes = []\n",
|
||||
"\n",
|
||||
" image_files = _get_sorted_image_files(image_dir) if image_dir is not None else None\n",
|
||||
" md_texts = [d[\"md\"] for d in json_dicts] if json_dicts is not None else None\n",
|
||||
"\n",
|
||||
" doc_chunks = [c for d in docs for c in d.text.split(\"---\")]\n",
|
||||
" for idx, doc_chunk in enumerate(doc_chunks):\n",
|
||||
" chunk_metadata = {\"page_num\": idx + 1}\n",
|
||||
" if image_files is not None:\n",
|
||||
" image_file = image_files[idx]\n",
|
||||
" chunk_metadata[\"image_path\"] = str(image_file)\n",
|
||||
" if md_texts is not None:\n",
|
||||
" chunk_metadata[\"parsed_text_markdown\"] = md_texts[idx]\n",
|
||||
" chunk_metadata[\"parsed_text\"] = doc_chunk\n",
|
||||
" node = TextNode(\n",
|
||||
" text=\"\",\n",
|
||||
" metadata=chunk_metadata,\n",
|
||||
" )\n",
|
||||
" nodes.append(node)\n",
|
||||
"\n",
|
||||
" return nodes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f591669c-5a8e-491d-9cef-0b754abbf26f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# this will split into pages\n",
|
||||
"text_nodes = get_text_nodes(docs_text, image_dir=\"data_images\", json_dicts=md_json_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "32c13950-c1db-435f-b5b4-89d62b8b7744",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_num: 11\n",
|
||||
"image_path: data_images/1ddd5654-062b-4e19-b488-d66efc9c509d-page_39.jpg\n",
|
||||
"parsed_text_markdown: # Commitment to Disciplined Reinvestment Rate\n",
|
||||
"page_number: 1\n",
|
||||
"file_name: data/conocophillips.pdf\n",
|
||||
"image_path: slide_images/page_1.jpg\n",
|
||||
"\n",
|
||||
"| Period | Description | Reinvestment Rate | WTI Average |\n",
|
||||
"|--------------|--------------------------------------|-------------------|-------------|\n",
|
||||
"| 2012-2016 | Industry Growth Focus | >100% | ~$75/BBL |\n",
|
||||
"| 2017-2022 | ConocoPhillips Strategy Reset | <60% | ~$63/BBL |\n",
|
||||
"| 2023E | | | at $80/BBL |\n",
|
||||
"| 2024-2028 | Disciplined Reinvestment Rate | ~50% | at $60/BBL |\n",
|
||||
"| 2029-2032 | | ~6% CFO CAGR | at $60/BBL |\n",
|
||||
"\n",
|
||||
"- **Historic Reinvestment Rate**: Gray bars\n",
|
||||
"- **Reinvestment Rate at $60/BBL WTI**: Blue bars\n",
|
||||
"- **Reinvestment Rate at $80/BBL WTI**: Dashed blue lines\n",
|
||||
"# ConocoPhillips\n",
|
||||
"\n",
|
||||
"Reinvestment rate and cash from operations (CFO) are non-GAAP measures. Definitions and reconciliations are included in the Appendix.\n",
|
||||
"parsed_text: Commitment to Disciplined Reinvestment Rate\n",
|
||||
" Industry ConocoPhillips\n",
|
||||
" Strategy Reset Disciplined Reinvestment Rate is the Foundation for Superior\n",
|
||||
" Growth Focus Returns on and of Capital, while Driving Durable CFO Growth\n",
|
||||
" 100% <60% 50% 6% at $60/BBL WTI\n",
|
||||
" Reinvestment Rate Reinvestment Rate Reinvestment Rate10-YearCFO CAGR Planning PriceMid-Cycle\n",
|
||||
" 2024-2032\n",
|
||||
" 2 100%\n",
|
||||
" 1 75%\n",
|
||||
" 1 50%\n",
|
||||
" 1 WTIat $80/BBL at S80/BBL\n",
|
||||
" 25% 'S75/BBL $63/BBL WTI\n",
|
||||
" WTI WTI at S80/BBL at S60/BBL at S60/BBL\n",
|
||||
" Average Average WTI WTI WTI\n",
|
||||
" 0%\n",
|
||||
" 2012-2016 2017-2022 2023E 2024-2028 2029-2032\n",
|
||||
" Historic Reinvestment Rate Reinvestment Rate at $60/BBL WTI Reinvestment Rate at $80/BBL WTI\n",
|
||||
" Reinvestment rate and cash from operations (CFO) are non-GAAP measures: Definitions and reconciliations are included in the Appendix ConocoPhillips\n"
|
||||
"## 2023 Analyst & Investor Meeting\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(text_nodes[10].get_content(metadata_mode=\"all\"))"
|
||||
"print(text_nodes[0].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -400,37 +399,11 @@
|
||||
"execution_count": null,
|
||||
"id": "6ea53c31-0e38-421c-8d9b-0e3adaa1677e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages/tiktoken/core.py:50: RuntimeWarning: coroutine 'LlamaParse.aload_data' was never awaited\n",
|
||||
" self._core_bpe = _tiktoken.CoreBPE(mergeable_ranks, special_tokens, pat_str)\n",
|
||||
"RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from llama_index.core import (\n",
|
||||
" StorageContext,\n",
|
||||
" VectorStoreIndex,\n",
|
||||
" load_index_from_storage,\n",
|
||||
")\n",
|
||||
"from llama_index.core import VectorStoreIndex\n",
|
||||
"\n",
|
||||
"if not os.path.exists(\"storage_nodes\"):\n",
|
||||
" index = VectorStoreIndex(text_nodes, embed_model=embed_model)\n",
|
||||
" # save index to disk\n",
|
||||
" index.set_index_id(\"vector_index\")\n",
|
||||
" index.storage_context.persist(\"./storage_nodes\")\n",
|
||||
"else:\n",
|
||||
" # rebuild storage context\n",
|
||||
" storage_context = StorageContext.from_defaults(persist_dir=\"storage_nodes\")\n",
|
||||
" # load index\n",
|
||||
" index = load_index_from_storage(storage_context, index_id=\"vector_index\")\n",
|
||||
"\n",
|
||||
"retriever = index.as_retriever()"
|
||||
"index = VectorStoreIndex(nodes=text_nodes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -450,82 +423,77 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.query_engine import CustomQueryEngine, SimpleMultiModalQueryEngine\n",
|
||||
"from llama_index.core.query_engine import CustomQueryEngine\n",
|
||||
"from llama_index.core.retrievers import BaseRetriever\n",
|
||||
"from llama_index.multi_modal_llms.openai import OpenAIMultiModal\n",
|
||||
"from llama_index.core.schema import ImageNode, NodeWithScore, MetadataMode\n",
|
||||
"from llama_index.core.prompts import PromptTemplate\n",
|
||||
"from llama_index.core.schema import MetadataMode\n",
|
||||
"from llama_index.core.base.response.schema import Response\n",
|
||||
"from typing import Optional\n",
|
||||
"from llama_index.core.llms import TextBlock, ImageBlock, ChatMessage\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"gpt_4o = OpenAIMultiModal(model=\"gpt-4o\", max_new_tokens=4096)\n",
|
||||
"\n",
|
||||
"QA_PROMPT_TMPL = \"\"\"\\\n",
|
||||
"qa_prompt_block_text = \"\"\"\\\n",
|
||||
"Below we give parsed text from slides in two different formats, as well as the image.\n",
|
||||
"\n",
|
||||
"We parse the text in both 'markdown' mode as well as 'raw text' mode. Markdown mode attempts \\\n",
|
||||
"to convert relevant diagrams into tables, whereas raw text tries to maintain the rough spatial \\\n",
|
||||
"layout of the text.\n",
|
||||
"\n",
|
||||
"Use the image information first and foremost. ONLY use the text/markdown information \n",
|
||||
"if you can't understand the image.\n",
|
||||
"\n",
|
||||
"---------------------\n",
|
||||
"{context_str}\n",
|
||||
"---------------------\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"image_prefix_block = TextBlock(text=\"And here are the corresponding images per page\\n\")\n",
|
||||
"\n",
|
||||
"image_suffix = \"\"\"\\\n",
|
||||
"Given the context information and not prior knowledge, answer the query. Explain whether you got the answer\n",
|
||||
"from the parsed markdown or raw text or image, and if there's discrepancies, and your reasoning for the final answer.\n",
|
||||
"\n",
|
||||
"Query: {query_str}\n",
|
||||
"Answer: \"\"\"\n",
|
||||
"\n",
|
||||
"QA_PROMPT = PromptTemplate(QA_PROMPT_TMPL)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class MultimodalQueryEngine(CustomQueryEngine):\n",
|
||||
" \"\"\"Custom multimodal Query Engine.\n",
|
||||
"\n",
|
||||
" Takes in a retriever to retrieve a set of document nodes.\n",
|
||||
" Also takes in a prompt template and multimodal model.\n",
|
||||
" Takes in a retriever to retrieve a set of document nodes and respond using an LLM + retrieved text/images.\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" qa_prompt: PromptTemplate\n",
|
||||
" retriever: BaseRetriever\n",
|
||||
" multi_modal_llm: OpenAIMultiModal\n",
|
||||
" llm: OpenAI\n",
|
||||
"\n",
|
||||
" def __init__(self, qa_prompt: Optional[PromptTemplate] = None, **kwargs) -> None:\n",
|
||||
" def __init__(self, **kwargs) -> None:\n",
|
||||
" \"\"\"Initialize.\"\"\"\n",
|
||||
" super().__init__(qa_prompt=qa_prompt or QA_PROMPT, **kwargs)\n",
|
||||
" super().__init__(**kwargs)\n",
|
||||
"\n",
|
||||
" def custom_query(self, query_str: str):\n",
|
||||
" # retrieve text nodes\n",
|
||||
" nodes = self.retriever.retrieve(query_str)\n",
|
||||
" # create ImageNode items from text nodes\n",
|
||||
" image_nodes = [\n",
|
||||
" NodeWithScore(node=ImageNode(image_path=n.metadata[\"image_path\"]))\n",
|
||||
" image_blocks = [\n",
|
||||
" ImageBlock(path=n.metadata[\"image_path\"])\n",
|
||||
" for n in nodes\n",
|
||||
" if n.metadata.get(\"image_path\")\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" # create context string from text nodes, dump into the prompt\n",
|
||||
" context_str = \"\\n\\n\".join(\n",
|
||||
" [r.get_content(metadata_mode=MetadataMode.LLM) for r in nodes]\n",
|
||||
" )\n",
|
||||
" fmt_prompt = self.qa_prompt.format(context_str=context_str, query_str=query_str)\n",
|
||||
"\n",
|
||||
" formatted_msg = ChatMessage(\n",
|
||||
" role=\"user\",\n",
|
||||
" blocks=[\n",
|
||||
" TextBlock(text=qa_prompt_block_text.format(context_str=context_str)),\n",
|
||||
" image_prefix_block,\n",
|
||||
" *image_blocks,\n",
|
||||
" TextBlock(text=image_suffix.format(query_str=query_str)),\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # synthesize an answer from formatted text and images\n",
|
||||
" llm_response = self.multi_modal_llm.complete(\n",
|
||||
" prompt=fmt_prompt,\n",
|
||||
" image_documents=[image_node.node for image_node in image_nodes],\n",
|
||||
" )\n",
|
||||
" return Response(\n",
|
||||
" response=str(llm_response),\n",
|
||||
" source_nodes=nodes,\n",
|
||||
" metadata={\"text_nodes\": text_nodes, \"image_nodes\": image_nodes},\n",
|
||||
" )\n",
|
||||
" llm_response = self.llm.chat([formatted_msg])\n",
|
||||
"\n",
|
||||
" return response"
|
||||
" return Response(\n",
|
||||
" response=str(llm_response.message.content),\n",
|
||||
" source_nodes=nodes,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -536,7 +504,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_engine = MultimodalQueryEngine(\n",
|
||||
" retriever=index.as_retriever(similarity_top_k=9), multi_modal_llm=gpt_4o\n",
|
||||
" retriever=index.as_retriever(similarity_top_k=3), llm=llm\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -547,80 +515,7 @@
|
||||
"source": [
|
||||
"### Define Baseline\n",
|
||||
"\n",
|
||||
"In addition, we define a \"baseline\" where we rely only on text-based indexing. Here we define an index using only the nodes that are parsed in text-mode from LlamaParse. \n",
|
||||
"\n",
|
||||
"**NOTE**: We don't currently include the markdown-parsed text because that was parsed with GPT-4o, so already uses a multimodal model during the text extraction phase.\n",
|
||||
"\n",
|
||||
"It is of course a valid experiment to compare RAG where multimodal extraction only happens during indexing, vs. the current multimodal RAG implementation where images are fed during synthesis to the LLM. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c0b15a48-d177-4666-aec2-98ee90664642",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_nodes(docs):\n",
|
||||
" \"\"\"Split docs into nodes, by separator.\"\"\"\n",
|
||||
" nodes = []\n",
|
||||
" for doc in docs:\n",
|
||||
" doc_chunks = doc.text.split(\"\\n---\\n\")\n",
|
||||
" for doc_chunk in doc_chunks:\n",
|
||||
" node = TextNode(\n",
|
||||
" text=doc_chunk,\n",
|
||||
" metadata=deepcopy(doc.metadata),\n",
|
||||
" )\n",
|
||||
" nodes.append(node)\n",
|
||||
"\n",
|
||||
" return nodes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2065d2c6-d6ba-4ee3-8e9e-dbc83cbcec1b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_nodes = get_nodes(docs_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bcaea1a8-26c9-4385-8f62-32855aa898b6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Our Differentiated Portfolio: Deep; Durable and Diverse\n",
|
||||
" 20 BBOE of Resource Diverse Production Base\n",
|
||||
" Under $40/BBL Cost of Supply 10-Year Plan Cumulative Production (BBOE)\n",
|
||||
" S50 S32/BBL Lower 48 Alaska\n",
|
||||
" Average Cost of Supply\n",
|
||||
" 3 $40 GKA GWA\n",
|
||||
" GPA WNS\n",
|
||||
" $30 EMENA\n",
|
||||
" 3 Norway\n",
|
||||
" 8 $20\n",
|
||||
" E Qatar Libya\n",
|
||||
" Asia Pacific Canada\n",
|
||||
" $10 Permian\n",
|
||||
" APLNG Montney\n",
|
||||
" S0\n",
|
||||
" 10 15 20 Bakken\n",
|
||||
" Resource (BBOE) Eagle Ford Other Malaysia ChinaSurmont\n",
|
||||
" Lower 48 Canada Alaska EMENA Asia Pacific\n",
|
||||
"Costs assumemid-cycle price environment of S60/BBL WTI:\n",
|
||||
" ConocoPhillips\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(base_nodes[13].get_content(metadata_mode=\"all\"))"
|
||||
"In addition, we define a \"baseline\" where we rely only on text-based indexing. Here we define an index using only the nodes that are parsed in text-mode from LlamaParse. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -630,8 +525,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"base_index = VectorStoreIndex(base_nodes, embed_model=embed_model)\n",
|
||||
"base_query_engine = base_index.as_query_engine(llm=llm, similarity_top_k=9)"
|
||||
"base_index = VectorStoreIndex(nodes=text_nodes)\n",
|
||||
"base_query_engine = base_index.as_query_engine(llm=llm, similarity_top_k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -652,7 +547,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.tools import QueryEngineTool\n",
|
||||
"from llama_index.core.agent import FunctionCallingAgentWorker\n",
|
||||
"from llama_index.core.agent import FunctionAgent\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"vector_tool = QueryEngineTool.from_defaults(\n",
|
||||
@@ -662,9 +557,15 @@
|
||||
" \"Useful for retrieving specific context from the data. Do NOT select if question asks for a summary of the data.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"agent = FunctionCallingAgentWorker.from_tools(\n",
|
||||
" [vector_tool], llm=llm, verbose=True\n",
|
||||
").as_agent()"
|
||||
"agent = FunctionAgent(\n",
|
||||
" tools=[vector_tool],\n",
|
||||
" llm=llm,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"from llama_index.core.workflow import Context\n",
|
||||
"\n",
|
||||
"# Context to store chat history for the session\n",
|
||||
"ctx = Context(agent)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -682,9 +583,12 @@
|
||||
" \"Useful for retrieving specific context from the data. Do NOT select if question asks for a summary of the data.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"base_agent = FunctionCallingAgentWorker.from_tools(\n",
|
||||
" [base_vector_tool], llm=llm, verbose=True\n",
|
||||
").as_agent()"
|
||||
"base_agent = FunctionAgent(\n",
|
||||
" tools=[base_vector_tool],\n",
|
||||
" llm=llm,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"base_ctx = Context(base_agent)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -702,79 +606,14 @@
|
||||
"execution_count": null,
|
||||
"id": "d78e53cf-35cb-4ef8-b03e-1b47ba15ae64",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Added user message to memory: Tell me about the diverse geographies where Conoco Phillips has a production base\n",
|
||||
"=== Calling Function ===\n",
|
||||
"Calling function: vector_tool with args: {\"input\": \"Conoco Phillips production base geographies\"}\n",
|
||||
"=== Function Output ===\n",
|
||||
"ConocoPhillips' production base geographies include:\n",
|
||||
"\n",
|
||||
"1. **Lower 48** (Permian, Eagle Ford, Bakken, Other)\n",
|
||||
"2. **Alaska** (GKA, GWA, GPA, WNS)\n",
|
||||
"3. **EMENA** (Norway, Libya, Qatar)\n",
|
||||
"4. **Asia Pacific** (APLNG, Malaysia, China)\n",
|
||||
"5. **Canada** (Montney, Surmont)\n",
|
||||
"\n",
|
||||
"This information was derived from the image on page 14, which provides a detailed breakdown of the diverse production base and the regions involved. The parsed markdown and raw text also support this information, but the image provides the clearest and most comprehensive view. There are no discrepancies between the image and the parsed text in this case.\n",
|
||||
"=== LLM Response ===\n",
|
||||
"ConocoPhillips has a diverse production base spread across various geographies, including:\n",
|
||||
"\n",
|
||||
"1. **Lower 48**:\n",
|
||||
" - Permian Basin\n",
|
||||
" - Eagle Ford\n",
|
||||
" - Bakken\n",
|
||||
" - Other regions within the continental United States\n",
|
||||
"\n",
|
||||
"2. **Alaska**:\n",
|
||||
" - Greater Kuparuk Area (GKA)\n",
|
||||
" - Greater Prudhoe Area (GPA)\n",
|
||||
" - Greater Willow Area (GWA)\n",
|
||||
" - Western North Slope (WNS)\n",
|
||||
"\n",
|
||||
"3. **EMENA (Europe, Middle East, and North Africa)**:\n",
|
||||
" - Norway\n",
|
||||
" - Libya\n",
|
||||
" - Qatar\n",
|
||||
"\n",
|
||||
"4. **Asia Pacific**:\n",
|
||||
" - Australia Pacific LNG (APLNG)\n",
|
||||
" - Malaysia\n",
|
||||
" - China\n",
|
||||
"\n",
|
||||
"5. **Canada**:\n",
|
||||
" - Montney\n",
|
||||
" - Surmont\n",
|
||||
"\n",
|
||||
"These regions highlight the global reach and diverse geographical footprint of ConocoPhillips' production operations.\n",
|
||||
"Added user message to memory: Tell me about the diverse geographies where Conoco Phillips has a production base\n",
|
||||
"=== Calling Function ===\n",
|
||||
"Calling function: vector_tool with args: {\"input\": \"diverse geographies where Conoco Phillips has a production base\"}\n",
|
||||
"=== Function Output ===\n",
|
||||
"ConocoPhillips has a diverse production base that includes the Lower 48 (Permian, Bakken, Eagle Ford), Alaska, Canada (Montney, Surmont), EMENA (Norway, Libya), Asia Pacific (Malaysia, China, APLNG), and Qatar.\n",
|
||||
"=== LLM Response ===\n",
|
||||
"ConocoPhillips has a diverse production base spanning several key geographies:\n",
|
||||
"\n",
|
||||
"1. **Lower 48 (United States)**: This includes major production areas such as the Permian Basin, Bakken Formation, and Eagle Ford Shale.\n",
|
||||
"2. **Alaska**: Significant operations in the North Slope region.\n",
|
||||
"3. **Canada**: Operations in the Montney Formation and the Surmont oil sands project.\n",
|
||||
"4. **EMENA (Europe, Middle East, and North Africa)**: Notable operations in Norway and Libya.\n",
|
||||
"5. **Asia Pacific**: Includes operations in Malaysia, China, and the Australia Pacific LNG (APLNG) project.\n",
|
||||
"6. **Qatar**: Involvement in the country's energy sector.\n",
|
||||
"\n",
|
||||
"These regions highlight the company's extensive and varied geographical footprint in the energy production industry.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = (\n",
|
||||
" \"Tell me about the diverse geographies where Conoco Phillips has a production base\"\n",
|
||||
")\n",
|
||||
"response = agent.query(query)\n",
|
||||
"base_response = base_agent.query(query)"
|
||||
"\n",
|
||||
"response = await agent.run(query, ctx=ctx)\n",
|
||||
"base_response = await base_agent.run(query, ctx=base_ctx)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -782,205 +621,27 @@
|
||||
"execution_count": null,
|
||||
"id": "355d2aa4-c26f-480e-b512-4446acbd9227",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ConocoPhillips has a diverse production base spread across various geographies, including:\n",
|
||||
"\n",
|
||||
"1. **Lower 48**:\n",
|
||||
" - Permian Basin\n",
|
||||
" - Eagle Ford\n",
|
||||
" - Bakken\n",
|
||||
" - Other regions within the continental United States\n",
|
||||
"\n",
|
||||
"2. **Alaska**:\n",
|
||||
" - Greater Kuparuk Area (GKA)\n",
|
||||
" - Greater Prudhoe Area (GPA)\n",
|
||||
" - Greater Willow Area (GWA)\n",
|
||||
" - Western North Slope (WNS)\n",
|
||||
"\n",
|
||||
"3. **EMENA (Europe, Middle East, and North Africa)**:\n",
|
||||
" - Norway\n",
|
||||
" - Libya\n",
|
||||
" - Qatar\n",
|
||||
"\n",
|
||||
"4. **Asia Pacific**:\n",
|
||||
" - Australia Pacific LNG (APLNG)\n",
|
||||
" - Malaysia\n",
|
||||
" - China\n",
|
||||
"\n",
|
||||
"5. **Canada**:\n",
|
||||
" - Montney\n",
|
||||
" - Surmont\n",
|
||||
"\n",
|
||||
"These regions highlight the global reach and diverse geographical footprint of ConocoPhillips' production operations.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(str(response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d584c560-8f49-4c10-a4db-2e0d3b7085d2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_num: 14\n",
|
||||
"image_path: data_images/1ddd5654-062b-4e19-b488-d66efc9c509d-page_12.jpg\n",
|
||||
"parsed_text_markdown: # Our Differentiated Portfolio: Deep, Durable and Diverse\n",
|
||||
"\n",
|
||||
"## ~20 BBOE of Resource\n",
|
||||
"Under $40/BBL Cost of Supply\n",
|
||||
"\n",
|
||||
"### ~ $32/BBL\n",
|
||||
"Average Cost of Supply\n",
|
||||
"\n",
|
||||
"### WTI Cost of Supply ($/BBL)\n",
|
||||
"\n",
|
||||
"| Cost ($/BBL) | Resource (BBOE) |\n",
|
||||
"|--------------|-----------------|\n",
|
||||
"| $0 | 0 |\n",
|
||||
"| $10 | |\n",
|
||||
"| $20 | |\n",
|
||||
"| $30 | |\n",
|
||||
"| $40 | |\n",
|
||||
"| $50 | |\n",
|
||||
"\n",
|
||||
"- **Legend:**\n",
|
||||
" - Lower 48\n",
|
||||
" - Canada\n",
|
||||
" - Alaska\n",
|
||||
" - EMENA\n",
|
||||
" - Asia Pacific\n",
|
||||
"\n",
|
||||
"*Costs assume a mid-cycle price environment of $60/BBL WTI.*\n",
|
||||
"\n",
|
||||
"## Diverse Production Base\n",
|
||||
"10-Year Plan Cumulative Production (BBOE)\n",
|
||||
"\n",
|
||||
"| Region | Sub-region |\n",
|
||||
"|--------------|-----------------|\n",
|
||||
"| Lower 48 | Permian |\n",
|
||||
"| | Eagle Ford |\n",
|
||||
"| | Bakken |\n",
|
||||
"| | Other |\n",
|
||||
"| Alaska | GKA |\n",
|
||||
"| | GWA |\n",
|
||||
"| | GPA |\n",
|
||||
"| | WNS |\n",
|
||||
"| EMENA | Norway |\n",
|
||||
"| | Libya |\n",
|
||||
"| | Qatar |\n",
|
||||
"| Asia Pacific | APLNG |\n",
|
||||
"| | Malaysia |\n",
|
||||
"| | China |\n",
|
||||
"| Canada | Montney |\n",
|
||||
"| | Surmont |\n",
|
||||
"parsed_text: Our Differentiated Portfolio: Deep; Durable and Diverse\n",
|
||||
" 20 BBOE of Resource Diverse Production Base\n",
|
||||
" Under $40/BBL Cost of Supply 10-Year Plan Cumulative Production (BBOE)\n",
|
||||
" S50 S32/BBL Lower 48 Alaska\n",
|
||||
" Average Cost of Supply\n",
|
||||
" 3 $40 GKA GWA\n",
|
||||
" GPA WNS\n",
|
||||
" $30 EMENA\n",
|
||||
" 3 Norway\n",
|
||||
" 8 $20\n",
|
||||
" E Qatar Libya\n",
|
||||
" Asia Pacific Canada\n",
|
||||
" $10 Permian\n",
|
||||
" APLNG Montney\n",
|
||||
" S0\n",
|
||||
" 10 15 20 Bakken\n",
|
||||
" Resource (BBOE) Eagle Ford Other Malaysia ChinaSurmont\n",
|
||||
" Lower 48 Canada Alaska EMENA Asia Pacific\n",
|
||||
"Costs assumemid-cycle price environment of S60/BBL WTI:\n",
|
||||
" ConocoPhillips\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(response.source_nodes[7].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d21d694b-6618-4d04-a6f6-8b0c2625f539",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ConocoPhillips has a diverse production base spanning several key geographies:\n",
|
||||
"\n",
|
||||
"1. **Lower 48 (United States)**: This includes major production areas such as the Permian Basin, Bakken Formation, and Eagle Ford Shale.\n",
|
||||
"2. **Alaska**: Significant operations in the North Slope region.\n",
|
||||
"3. **Canada**: Operations in the Montney Formation and the Surmont oil sands project.\n",
|
||||
"4. **EMENA (Europe, Middle East, and North Africa)**: Notable operations in Norway and Libya.\n",
|
||||
"5. **Asia Pacific**: Includes operations in Malaysia, China, and the Australia Pacific LNG (APLNG) project.\n",
|
||||
"6. **Qatar**: Involvement in the country's energy sector.\n",
|
||||
"\n",
|
||||
"These regions highlight the company's extensive and varied geographical footprint in the energy production industry.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(str(base_response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d3afccae-ad8d-4c5d-9d93-810dba413a5d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Our Differentiated Portfolio: Deep; Durable and Diverse\n",
|
||||
" 20 BBOE of Resource Diverse Production Base\n",
|
||||
" Under $40/BBL Cost of Supply 10-Year Plan Cumulative Production (BBOE)\n",
|
||||
" S50 S32/BBL Lower 48 Alaska\n",
|
||||
" Average Cost of Supply\n",
|
||||
" 3 $40 GKA GWA\n",
|
||||
" GPA WNS\n",
|
||||
" $30 EMENA\n",
|
||||
" 3 Norway\n",
|
||||
" 8 $20\n",
|
||||
" E Qatar Libya\n",
|
||||
" Asia Pacific Canada\n",
|
||||
" $10 Permian\n",
|
||||
" APLNG Montney\n",
|
||||
" S0\n",
|
||||
" 10 15 20 Bakken\n",
|
||||
" Resource (BBOE) Eagle Ford Other Malaysia ChinaSurmont\n",
|
||||
" Lower 48 Canada Alaska EMENA Asia Pacific\n",
|
||||
"Costs assumemid-cycle price environment of S60/BBL WTI:\n",
|
||||
" ConocoPhillips\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(base_response.source_nodes[1].get_content(metadata_mode=\"all\"))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_index_v3",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_index_v3"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -15,7 +15,12 @@
|
||||
"source": [
|
||||
"This cookbook shows how to use LlamaParse and OpenAI's multimodal models to query over IKEA instruction manual PDFs, which mainly contain images and diagrams to show how one can assemble the product.\n",
|
||||
"\n",
|
||||
"LlamaParse and multimodal LLMs can interpret these diagrams and translate them into textual instructions. With textual assistance, confusing visual instructions within the IKEA product manuals can be made easier to understand and interpret. Additionally, textual instructions can be helpful for those who are visually impaired."
|
||||
"LlamaParse and multimodal LLMs can interpret these diagrams and translate them into textual instructions. With textual assistance, confusing visual instructions within the IKEA product manuals can be made easier to understand and interpret. Additionally, textual instructions can be helpful for those who are visually impaired.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-20-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,7 +29,7 @@
|
||||
"source": [
|
||||
"## Install and Setup\n",
|
||||
"\n",
|
||||
"Install LlamaIndex, download the data, and apply `nest_asyncio`."
|
||||
"Install LlamaIndex, download the data, and configure the API keys."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -33,7 +38,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index llama-parse llama-index-multi-modal-llms-openai git+https://github.com/openai/CLIP.git"
|
||||
"%pip install \"llama-index>=0.13.0<0.14.0\" llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -47,17 +52,6 @@
|
||||
"!rm data.zip"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -73,8 +67,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"<Your OpenAI API Key>\"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"<Your LlamaCloud API Key>\""
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -84,13 +78,6 @@
|
||||
"## Code Implementation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up LlamaParse. We will parse the PDF files into markdown and use the GPT-4o multimodal model to parse the PDFs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -107,11 +94,11 @@
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"parser = LlamaParse(\n",
|
||||
" result_type=\"markdown\",\n",
|
||||
" parsing_instruction=\"You are given IKEA assembly instruction manuals\",\n",
|
||||
" use_vendor_multimodal_model=True,\n",
|
||||
" vendor_multimodal_model_name=\"openai-gpt4o\",\n",
|
||||
" show_progress=True,\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -147,18 +134,48 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 0%| | 0/5 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 0d3de1c0-e4c6-4cca-9e85-b738b301119a\n",
|
||||
"Started parsing the file under job_id 48ef73aa-fe6b-4e67-a4c0-ebe5d1fc532c\n",
|
||||
"Started parsing the file under job_id 71cdf344-d4c1-40ca-812c-3ada19aeca5a\n",
|
||||
"Started parsing the file under job_id 747a4847-7971-4e3b-87c5-6ce93a05c260\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 20%|██ | 1/5 [00:14<00:58, 14.62s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id a2a9fd6a-fa25-4410-8ccc-9da7d38e1590\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Getting job results: 100%|██████████| 5/5 [00:38<00:00, 7.78s/it]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"md_json_objs = parser.get_json_result(files)\n",
|
||||
"md_json_list = md_json_objs[0][\"pages\"]\n",
|
||||
"image_dicts = parser.get_images(md_json_objs, download_path=\"data_images\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create helper functions to create a list of `TextNode`s from the markdown tables to feed into the `VectorStoreIndex`."
|
||||
"results = await parser.aparse(files)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,47 +184,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"from pathlib import Path\n",
|
||||
"import typing as t\n",
|
||||
"from llama_index.core.schema import TextNode\n",
|
||||
"all_text_nodes = []\n",
|
||||
"\n",
|
||||
"for result in results:\n",
|
||||
" text_nodes = result.get_markdown_nodes(split_by_page=True)\n",
|
||||
" image_nodes = await result.aget_image_nodes(\n",
|
||||
" include_object_images=False,\n",
|
||||
" include_screenshot_images=True,\n",
|
||||
" image_download_dir=\"./data_images\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"def get_page_number(file_name):\n",
|
||||
" \"\"\"Gets page number of images using regex on file names\"\"\"\n",
|
||||
" match = re.search(r\"-page-(\\d+)\\.jpg$\", str(file_name))\n",
|
||||
" if match:\n",
|
||||
" return int(match.group(1))\n",
|
||||
" return 0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _get_sorted_image_files(image_dir):\n",
|
||||
" \"\"\"Get image files sorted by page.\"\"\"\n",
|
||||
" raw_files = [f for f in list(Path(image_dir).iterdir()) if f.is_file()]\n",
|
||||
" sorted_files = sorted(raw_files, key=get_page_number)\n",
|
||||
" return sorted_files\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_text_nodes(json_dicts, image_dir) -> t.List[TextNode]:\n",
|
||||
" \"\"\"Creates nodes from json + images\"\"\"\n",
|
||||
"\n",
|
||||
" nodes = []\n",
|
||||
"\n",
|
||||
" docs = [doc[\"md\"] for doc in json_dicts] # extract text\n",
|
||||
" image_files = _get_sorted_image_files(image_dir) # extract images\n",
|
||||
"\n",
|
||||
" for idx, doc in enumerate(docs):\n",
|
||||
" # adds both a text node and the corresponding image node (jpg of the page) for each page\n",
|
||||
" node = TextNode(\n",
|
||||
" text=doc,\n",
|
||||
" metadata={\"image_path\": str(image_files[idx]), \"page_num\": idx + 1},\n",
|
||||
" )\n",
|
||||
" nodes.append(node)\n",
|
||||
"\n",
|
||||
" return nodes\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"text_nodes = get_text_nodes(md_json_list, \"data_images\")"
|
||||
" for text_node, image_node in zip(text_nodes, image_nodes):\n",
|
||||
" text_node.metadata[\"image_path\"] = image_node.image_path\n",
|
||||
" all_text_nodes.append(text_node)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -225,34 +214,25 @@
|
||||
"source": [
|
||||
"from llama_index.core import (\n",
|
||||
" VectorStoreIndex,\n",
|
||||
" StorageContext,\n",
|
||||
" load_index_from_storage,\n",
|
||||
" Settings,\n",
|
||||
")\n",
|
||||
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
||||
"from llama_index.llms.openai import OpenAI\n",
|
||||
"\n",
|
||||
"embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\")\n",
|
||||
"llm = OpenAI(\"gpt-4o\")\n",
|
||||
"llm = OpenAI(\"gpt-5-mini\")\n",
|
||||
"\n",
|
||||
"Settings.llm = llm\n",
|
||||
"Settings.embed_model = embed_model\n",
|
||||
"\n",
|
||||
"if not os.path.exists(\"storage_ikea\"):\n",
|
||||
" index = VectorStoreIndex(text_nodes, embed_model=embed_model)\n",
|
||||
" index.storage_context.persist(persist_dir=\"./storage_ikea\")\n",
|
||||
"else:\n",
|
||||
" ctx = StorageContext.from_defaults(persist_dir=\"./storage_ikea\")\n",
|
||||
" index = load_index_from_storage(ctx)\n",
|
||||
"\n",
|
||||
"retriever = index.as_retriever()"
|
||||
"index = VectorStoreIndex(nodes=all_text_nodes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a custom query engine that uses GPT-4o's multimodal model."
|
||||
"Create a custom query engine that uses OpenAI for multi-modal response generation."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -263,77 +243,74 @@
|
||||
"source": [
|
||||
"from llama_index.core.query_engine import CustomQueryEngine\n",
|
||||
"from llama_index.core.retrievers import BaseRetriever\n",
|
||||
"from llama_index.multi_modal_llms.openai import OpenAIMultiModal\n",
|
||||
"from llama_index.core.schema import NodeWithScore, MetadataMode\n",
|
||||
"from llama_index.core.schema import MetadataMode\n",
|
||||
"from llama_index.core.base.response.schema import Response\n",
|
||||
"from llama_index.core.prompts import PromptTemplate\n",
|
||||
"from llama_index.core.schema import ImageNode\n",
|
||||
"from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock\n",
|
||||
"\n",
|
||||
"QA_PROMPT_TMPL = \"\"\"\\\n",
|
||||
"\n",
|
||||
"qa_prompt_block_text = \"\"\"\\\n",
|
||||
"Below we give parsed text from slides in two different formats, as well as the image.\n",
|
||||
"\n",
|
||||
"We parse the text in both 'markdown' mode as well as 'raw text' mode. Markdown mode attempts \\\n",
|
||||
"to convert relevant diagrams into tables, whereas raw text tries to maintain the rough spatial \\\n",
|
||||
"layout of the text.\n",
|
||||
"\n",
|
||||
"Use the image information first and foremost. ONLY use the text/markdown information \n",
|
||||
"if you can't understand the image.\n",
|
||||
"\n",
|
||||
"---------------------\n",
|
||||
"{context_str}\n",
|
||||
"---------------------\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"image_prefix_block = TextBlock(text=\"And here are the corresponding images per page\\n\")\n",
|
||||
"\n",
|
||||
"image_suffix = \"\"\"\\\n",
|
||||
"Given the context information and not prior knowledge, answer the query. Explain whether you got the answer\n",
|
||||
"from the parsed markdown or raw text or image, and if there's discrepancies, and your reasoning for the final answer.\n",
|
||||
"\n",
|
||||
"Query: {query_str}\n",
|
||||
"Answer: \"\"\"\n",
|
||||
"\n",
|
||||
"QA_PROMPT = PromptTemplate(QA_PROMPT_TMPL)\n",
|
||||
"\n",
|
||||
"gpt_4o_mm = OpenAIMultiModal(model=\"gpt-4o\", max_new_tokens=4096)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class MultimodalQueryEngine(CustomQueryEngine):\n",
|
||||
" qa_prompt: PromptTemplate\n",
|
||||
" retriever: BaseRetriever\n",
|
||||
" multi_modal_llm: OpenAIMultiModal\n",
|
||||
" \"\"\"Custom multimodal Query Engine.\n",
|
||||
"\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" qa_prompt: PromptTemplate,\n",
|
||||
" retriever: BaseRetriever,\n",
|
||||
" multi_modal_llm: OpenAIMultiModal,\n",
|
||||
" ):\n",
|
||||
" super().__init__(\n",
|
||||
" qa_prompt=qa_prompt, retriever=retriever, multi_modal_llm=multi_modal_llm\n",
|
||||
" )\n",
|
||||
" Takes in a retriever to retrieve a set of document nodes and respond using an LLM + retrieved text/images.\n",
|
||||
"\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" retriever: BaseRetriever\n",
|
||||
" llm: OpenAI\n",
|
||||
"\n",
|
||||
" def __init__(self, **kwargs) -> None:\n",
|
||||
" \"\"\"Initialize.\"\"\"\n",
|
||||
" super().__init__(**kwargs)\n",
|
||||
"\n",
|
||||
" def custom_query(self, query_str: str):\n",
|
||||
" # retrieve most relevant nodes\n",
|
||||
" # retrieve text nodes\n",
|
||||
" nodes = self.retriever.retrieve(query_str)\n",
|
||||
"\n",
|
||||
" # create image nodes from the image associated with those nodes\n",
|
||||
" image_nodes = [\n",
|
||||
" NodeWithScore(node=ImageNode(image_path=n.node.metadata[\"image_path\"]))\n",
|
||||
" # create ImageNode items from text nodes\n",
|
||||
" image_blocks = [\n",
|
||||
" ImageBlock(path=n.metadata[\"image_path\"])\n",
|
||||
" for n in nodes\n",
|
||||
" if n.metadata.get(\"image_path\")\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" # create context string from parsed markdown text\n",
|
||||
" ctx_str = \"\\n\\n\".join(\n",
|
||||
" [r.node.get_content(metadata_mode=MetadataMode.LLM) for r in nodes]\n",
|
||||
" # create context string from text nodes, dump into the prompt\n",
|
||||
" context_str = \"\\n\\n\".join(\n",
|
||||
" [r.get_content(metadata_mode=MetadataMode.LLM) for r in nodes]\n",
|
||||
" )\n",
|
||||
" # prompt for the LLM\n",
|
||||
" fmt_prompt = self.qa_prompt.format(context_str=ctx_str, query_str=query_str)\n",
|
||||
"\n",
|
||||
" # use the multimodal LLM to interpret images and generate a response to the prompt\n",
|
||||
" llm_repsonse = self.multi_modal_llm.complete(\n",
|
||||
" prompt=fmt_prompt,\n",
|
||||
" image_documents=[image_node.node for image_node in image_nodes],\n",
|
||||
" formatted_msg = ChatMessage(\n",
|
||||
" role=\"user\",\n",
|
||||
" blocks=[\n",
|
||||
" TextBlock(text=qa_prompt_block_text.format(context_str=context_str)),\n",
|
||||
" image_prefix_block,\n",
|
||||
" *image_blocks,\n",
|
||||
" TextBlock(text=image_suffix.format(query_str=query_str)),\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # synthesize an answer from formatted text and images\n",
|
||||
" llm_response = self.llm.chat([formatted_msg])\n",
|
||||
"\n",
|
||||
" return Response(\n",
|
||||
" response=str(llm_repsonse),\n",
|
||||
" response=str(llm_response.message.content),\n",
|
||||
" source_nodes=nodes,\n",
|
||||
" metadata={\"text_nodes\": text_nodes, \"image_nodes\": image_nodes},\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -351,9 +328,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_engine = MultimodalQueryEngine(\n",
|
||||
" qa_prompt=QA_PROMPT,\n",
|
||||
" retriever=index.as_retriever(similarity_top_k=9),\n",
|
||||
" multi_modal_llm=gpt_4o_mm,\n",
|
||||
" retriever=index.as_retriever(similarity_top_k=3),\n",
|
||||
" llm=llm,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -373,9 +349,33 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"The query asks about the parts included in the Uppspel, but the provided images and parsed text do not contain any information about the Uppspel. Instead, they contain information about other IKEA products such as SMÅGÖRA, FREDDE, and TUFFING.\n",
|
||||
"Answer (parts included in the UPPSPEL kit)\n",
|
||||
"\n",
|
||||
"Therefore, based on the provided images and parsed text, I cannot determine the parts included in the Uppspel. The answer cannot be derived from the given information."
|
||||
"I read the parts inventory diagram (image of the parts page). The parsed slide text only mentioned caster wheels and clips in the assembly steps, so the full parts list came from the image. The image is clear but some small part numbers are tiny; below I list the parts, quantities and the part numbers that are visible.\n",
|
||||
"\n",
|
||||
"- 2x long screws (107603) \n",
|
||||
"- 6x large screws/dowels (100214) \n",
|
||||
"- 5x cam screws / binding-post screws (118331) \n",
|
||||
"- 12x threaded connector dowels / cross dowels (100498) \n",
|
||||
"- 4x cylindrical spacers (106986) \n",
|
||||
"- 2x ribbed wooden dowels (101350) \n",
|
||||
"- 4x small screws (100413) \n",
|
||||
"- 4x hex/Allen-head screws (100181) \n",
|
||||
"- 2x wall plugs (111322) \n",
|
||||
"- 2x short screws (109067) \n",
|
||||
"- 12x small wood screws (109560) \n",
|
||||
"- 17x cam lock nuts (102534) \n",
|
||||
"- 4x oval/cover caps (135049 / FRE001) \n",
|
||||
"- 2x metal brackets / wall-mount plates (128985) \n",
|
||||
"- 4x mushroom-shaped plastic pegs / feet (128409 / 128303) \n",
|
||||
"- 1x small Allen key (100001) \n",
|
||||
"- 2x larger Allen keys (108490) \n",
|
||||
"- 2x round shallow plastic bowls (123602 / 123603) \n",
|
||||
"- 2x round deeper plastic bowls (126873 / FRE002)\n",
|
||||
"\n",
|
||||
"Notes / discrepancies:\n",
|
||||
"- The parsed text (markdown) included only partial info (mentions of caster wheels and clips) and did not contain the full inventory. The complete inventory above was taken from the parts-diagram image. \n",
|
||||
"- Some part numbers on the image are very small and I transcribed them as best as they appear; a few numbers may be slightly off due to image resolution."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -400,9 +400,13 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"The Tuffing is a bunk bed frame with a minimalist design, featuring a metal frame and safety rails on the top bunk. The image provided shows the Tuffing bunk bed with a ladder for access to the top bunk and a simple, sturdy construction.\n",
|
||||
"Answer: According to the parsed page text, the Tuffing is depicted as a bunk bed — a simple metal‑frame bunk with safety rails on the top bunk and a ladder in the middle (IKEA logo at the bottom right).\n",
|
||||
"\n",
|
||||
"I got the answer from the image provided. The image clearly shows the design and structure of the Tuffing bunk bed. There were no discrepancies between the parsed markdown or raw text and the image. The image was the primary source for understanding what the Tuffing looks like."
|
||||
"Where I got this:\n",
|
||||
"- Primary source for the description: the parsed markdown/alt‑text for page 1, which explicitly describes the bunk bed.\n",
|
||||
"\n",
|
||||
"Discrepancies / notes:\n",
|
||||
"- The actual image shown in the attached files (the large drawing with the big FREDDE title) is a different IKEA product (a desk with raised shelves), not the bunk bed described in the parsed text. Page 18’s parsed text shows a person fitting a fabric/mesh over a rectangular frame, and page 37 is a blank/credits page. Because the visual files and the parsed descriptions conflict, I relied on the parsed markdown description for the answer but there is uncertainty — the raw image content does not match that description."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -425,14 +429,11 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"The query asks for step 4 of assembling the Nordli. Based on the provided information, step 4 is described in the parsed text as follows:\n",
|
||||
"Step 4: Use 4x screws (part numbers 118331 and 112996) to attach the two panels as shown. Insert the screws into the indicated holes and tighten with a screwdriver.\n",
|
||||
"\n",
|
||||
"**Step 4:**\n",
|
||||
"- Insert the provided tool into the hole as shown.\n",
|
||||
"- Ensure the structure is properly aligned and secure.\n",
|
||||
"- Push down firmly to lock the structure in place.\n",
|
||||
"\n",
|
||||
"This information was derived from the parsed text, as the image provided does not contain step-by-step instructions for the Nordli assembly. There are no discrepancies between the parsed markdown and raw text for this step."
|
||||
"Source and notes:\n",
|
||||
"- This answer comes from the parsed text for page 6 (the raw parsed instructions).\n",
|
||||
"- The accompanying image for page 6, however, shows a close-up of inserting/rotating a cylindrical cam/dowel (labelled 106986), which doesn't visually match the parsed text's described screws/part numbers. Because you asked me to use only the provided context, I reported the parsed-text instruction as step 4 and noted the image/text discrepancy above."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -455,7 +456,9 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"If you're confused with reading the manual, you should contact IKEA customer service for assistance. This information is derived from the image on page 2, which shows a person with a question mark next to an IKEA box and another person making a phone call to IKEA. This visual cue indicates that contacting IKEA customer service is the recommended action if you need help."
|
||||
"Answer: Call IKEA for help (use the phone number on the manual or contact your local IKEA store).\n",
|
||||
"\n",
|
||||
"Source & reasoning: I read the parsed page text and inspected the image. Both show a confused person with a question mark, then a second panel of a person on the phone holding the instructions with an IKEA store in the background — indicating you should call IKEA. The three parsed variants (smagora, tuffing, uppspel) and the raw image all agree on this instruction, so there are no meaningful discrepancies."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -471,349 +474,11 @@
|
||||
")\n",
|
||||
"display(Markdown(str(response)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also create an agent around the query engine and chat with the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_index.core.agent import FunctionCallingAgentWorker\n",
|
||||
"from llama_index.core.tools import QueryEngineTool\n",
|
||||
"\n",
|
||||
"query_engine_tool = QueryEngineTool.from_defaults(\n",
|
||||
" query_engine=query_engine,\n",
|
||||
" name=\"query_engine_tool\",\n",
|
||||
" description=\"Useful for retrieving specific context from the data. Do NOT select if question asks for a summary of the data.\",\n",
|
||||
")\n",
|
||||
"agent = FunctionCallingAgentWorker.from_tools(\n",
|
||||
" [query_engine_tool], llm=llm, verbose=True\n",
|
||||
").as_agent()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Added user message to memory: Give a step-by-step instruction guide on how to assemble the Smagora\n",
|
||||
"=== Calling Function ===\n",
|
||||
"Calling function: query_engine_tool with args: {\"input\": \"step-by-step instruction guide on how to assemble the Smagora\"}\n",
|
||||
"=== Function Output ===\n",
|
||||
"The step-by-step instruction guide on how to assemble the Smågåra crib is provided in the images. The images show detailed visual instructions for each step of the assembly process, including the tools required, the parts involved, and the specific actions to be taken.\n",
|
||||
"\n",
|
||||
"Here is a summary of the steps based on the images:\n",
|
||||
"\n",
|
||||
"1. **Tools Required**:\n",
|
||||
" - Flathead screwdriver\n",
|
||||
" - Phillips screwdriver\n",
|
||||
" - Hammer\n",
|
||||
"\n",
|
||||
"2. **Preparation**:\n",
|
||||
" - Do not assemble alone; assemble with a partner.\n",
|
||||
" - Do not assemble on a hard surface; use a soft surface to avoid damage.\n",
|
||||
" - If you have questions or need assistance, contact IKEA customer service.\n",
|
||||
"\n",
|
||||
"3. **Step 1**:\n",
|
||||
" - Insert 12 screws into the designated holes on the frame.\n",
|
||||
"\n",
|
||||
"4. **Step 2**:\n",
|
||||
" - Align the side panels with the headboard and footboard.\n",
|
||||
" - Use 4 connectors and secure them with bolts and washers.\n",
|
||||
" - Tighten using the provided tool.\n",
|
||||
" - Carefully flip the structure as shown.\n",
|
||||
"\n",
|
||||
"5. **Step 3**:\n",
|
||||
" - Use the provided Allen key to tighten the screws into the designated holes.\n",
|
||||
" - Ensure the screws are properly aligned and tightened.\n",
|
||||
" - Repeat this process for all four screws.\n",
|
||||
" - Make sure the screws are flush with the surface.\n",
|
||||
"\n",
|
||||
"6. **Step 4**:\n",
|
||||
" - Insert the provided tool into the hole as shown.\n",
|
||||
" - Ensure the structure is properly aligned and secure.\n",
|
||||
" - Push down firmly to lock the structure in place.\n",
|
||||
"\n",
|
||||
"7. **Step 5**:\n",
|
||||
" - Insert 4 dowels into the designated holes on the board.\n",
|
||||
"\n",
|
||||
"8. **Step 6**:\n",
|
||||
" - Align the board with the dowels and insert it into the corresponding slots on the frame.\n",
|
||||
"\n",
|
||||
"9. **Step 7**:\n",
|
||||
" - Insert the top panel into the side panels.\n",
|
||||
" - Use 4 screws to secure the top panel.\n",
|
||||
" - Ensure the screws are properly aligned and tightened using the provided tool.\n",
|
||||
"\n",
|
||||
"10. **Step 8**:\n",
|
||||
" - Carefully flip the assembled structure upright.\n",
|
||||
" - Use 2 screws to secure the bottom panel.\n",
|
||||
" - Tighten the screws with the provided tool.\n",
|
||||
"\n",
|
||||
"These steps are derived from the images provided, which offer a clear and detailed visual guide for assembling the Smågåra crib.\n",
|
||||
"=== LLM Response ===\n",
|
||||
"Here is a step-by-step instruction guide on how to assemble the Smågåra crib:\n",
|
||||
"\n",
|
||||
"### Tools Required:\n",
|
||||
"- Flathead screwdriver\n",
|
||||
"- Phillips screwdriver\n",
|
||||
"- Hammer\n",
|
||||
"- Allen key (provided in the package)\n",
|
||||
"\n",
|
||||
"### Preparation:\n",
|
||||
"- **Safety First**: Assemble with a partner to ensure safety and ease.\n",
|
||||
"- **Surface**: Assemble on a soft surface to avoid damaging the parts.\n",
|
||||
"- **Assistance**: If you have questions or need help, contact IKEA customer service.\n",
|
||||
"\n",
|
||||
"### Step-by-Step Assembly:\n",
|
||||
"\n",
|
||||
"#### Step 1: Insert Screws into the Frame\n",
|
||||
"1. Insert 12 screws into the designated holes on the frame.\n",
|
||||
"2. Ensure the screws are properly aligned.\n",
|
||||
"\n",
|
||||
"#### Step 2: Align and Secure Side Panels\n",
|
||||
"1. Align the side panels with the headboard and footboard.\n",
|
||||
"2. Use 4 connectors and secure them with bolts and washers.\n",
|
||||
"3. Tighten the bolts using the provided tool.\n",
|
||||
"4. Carefully flip the structure as shown in the instructions.\n",
|
||||
"\n",
|
||||
"#### Step 3: Tighten Screws\n",
|
||||
"1. Use the provided Allen key to tighten the screws into the designated holes.\n",
|
||||
"2. Ensure the screws are properly aligned and tightened.\n",
|
||||
"3. Repeat this process for all four screws.\n",
|
||||
"4. Make sure the screws are flush with the surface.\n",
|
||||
"\n",
|
||||
"#### Step 4: Lock the Structure\n",
|
||||
"1. Insert the provided tool into the hole as shown.\n",
|
||||
"2. Ensure the structure is properly aligned and secure.\n",
|
||||
"3. Push down firmly to lock the structure in place.\n",
|
||||
"\n",
|
||||
"#### Step 5: Insert Dowels\n",
|
||||
"1. Insert 4 dowels into the designated holes on the board.\n",
|
||||
"\n",
|
||||
"#### Step 6: Align and Insert the Board\n",
|
||||
"1. Align the board with the dowels.\n",
|
||||
"2. Insert the board into the corresponding slots on the frame.\n",
|
||||
"\n",
|
||||
"#### Step 7: Secure the Top Panel\n",
|
||||
"1. Insert the top panel into the side panels.\n",
|
||||
"2. Use 4 screws to secure the top panel.\n",
|
||||
"3. Ensure the screws are properly aligned and tightened using the provided tool.\n",
|
||||
"\n",
|
||||
"#### Step 8: Secure the Bottom Panel\n",
|
||||
"1. Carefully flip the assembled structure upright.\n",
|
||||
"2. Use 2 screws to secure the bottom panel.\n",
|
||||
"3. Tighten the screws with the provided tool.\n",
|
||||
"\n",
|
||||
"By following these steps, you should be able to assemble the Smågåra crib successfully. If you encounter any issues, refer to the visual instructions provided in the package or contact IKEA customer service for assistance.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"Here is a step-by-step instruction guide on how to assemble the Smågåra crib:\n",
|
||||
"\n",
|
||||
"### Tools Required:\n",
|
||||
"- Flathead screwdriver\n",
|
||||
"- Phillips screwdriver\n",
|
||||
"- Hammer\n",
|
||||
"- Allen key (provided in the package)\n",
|
||||
"\n",
|
||||
"### Preparation:\n",
|
||||
"- **Safety First**: Assemble with a partner to ensure safety and ease.\n",
|
||||
"- **Surface**: Assemble on a soft surface to avoid damaging the parts.\n",
|
||||
"- **Assistance**: If you have questions or need help, contact IKEA customer service.\n",
|
||||
"\n",
|
||||
"### Step-by-Step Assembly:\n",
|
||||
"\n",
|
||||
"#### Step 1: Insert Screws into the Frame\n",
|
||||
"1. Insert 12 screws into the designated holes on the frame.\n",
|
||||
"2. Ensure the screws are properly aligned.\n",
|
||||
"\n",
|
||||
"#### Step 2: Align and Secure Side Panels\n",
|
||||
"1. Align the side panels with the headboard and footboard.\n",
|
||||
"2. Use 4 connectors and secure them with bolts and washers.\n",
|
||||
"3. Tighten the bolts using the provided tool.\n",
|
||||
"4. Carefully flip the structure as shown in the instructions.\n",
|
||||
"\n",
|
||||
"#### Step 3: Tighten Screws\n",
|
||||
"1. Use the provided Allen key to tighten the screws into the designated holes.\n",
|
||||
"2. Ensure the screws are properly aligned and tightened.\n",
|
||||
"3. Repeat this process for all four screws.\n",
|
||||
"4. Make sure the screws are flush with the surface.\n",
|
||||
"\n",
|
||||
"#### Step 4: Lock the Structure\n",
|
||||
"1. Insert the provided tool into the hole as shown.\n",
|
||||
"2. Ensure the structure is properly aligned and secure.\n",
|
||||
"3. Push down firmly to lock the structure in place.\n",
|
||||
"\n",
|
||||
"#### Step 5: Insert Dowels\n",
|
||||
"1. Insert 4 dowels into the designated holes on the board.\n",
|
||||
"\n",
|
||||
"#### Step 6: Align and Insert the Board\n",
|
||||
"1. Align the board with the dowels.\n",
|
||||
"2. Insert the board into the corresponding slots on the frame.\n",
|
||||
"\n",
|
||||
"#### Step 7: Secure the Top Panel\n",
|
||||
"1. Insert the top panel into the side panels.\n",
|
||||
"2. Use 4 screws to secure the top panel.\n",
|
||||
"3. Ensure the screws are properly aligned and tightened using the provided tool.\n",
|
||||
"\n",
|
||||
"#### Step 8: Secure the Bottom Panel\n",
|
||||
"1. Carefully flip the assembled structure upright.\n",
|
||||
"2. Use 2 screws to secure the bottom panel.\n",
|
||||
"3. Tighten the screws with the provided tool.\n",
|
||||
"\n",
|
||||
"By following these steps, you should be able to assemble the Smågåra crib successfully. If you encounter any issues, refer to the visual instructions provided in the package or contact IKEA customer service for assistance."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = agent.chat(\n",
|
||||
" \"Give a step-by-step instruction guide on how to assemble the Smagora\"\n",
|
||||
")\n",
|
||||
"display(Markdown(str(response)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Added user message to memory: How do I assemble the Fredde?\n",
|
||||
"=== Calling Function ===\n",
|
||||
"Calling function: query_engine_tool with args: {\"input\": \"step-by-step instruction guide on how to assemble the Fredde\"}\n",
|
||||
"=== Function Output ===\n",
|
||||
"The query asks for a step-by-step instruction guide on how to assemble the Fredde. However, based on the provided images and parsed text, there is no specific mention or visual representation of the Fredde assembly instructions. The images and text provided are related to other IKEA products such as Tuffing and Smågöra, but not Fredde.\n",
|
||||
"\n",
|
||||
"Therefore, I cannot provide the step-by-step instructions for assembling the Fredde from the given information. If you have the specific instructions for Fredde, please provide them, and I can assist you further.\n",
|
||||
"=== LLM Response ===\n",
|
||||
"It appears that the specific step-by-step instructions for assembling the Fredde desk are not available in the provided data. However, I can offer a general guide based on typical assembly procedures for IKEA furniture. For the most accurate and detailed instructions, please refer to the assembly manual that comes with the product.\n",
|
||||
"\n",
|
||||
"### General Assembly Guide for Fredde Desk:\n",
|
||||
"\n",
|
||||
"#### Tools Required:\n",
|
||||
"- Phillips screwdriver\n",
|
||||
"- Flathead screwdriver\n",
|
||||
"- Allen key (usually provided in the package)\n",
|
||||
"- Hammer (if needed for dowels)\n",
|
||||
"\n",
|
||||
"### Step-by-Step Assembly:\n",
|
||||
"\n",
|
||||
"#### Step 1: Unpack and Organize\n",
|
||||
"1. **Unpack** all the parts and hardware.\n",
|
||||
"2. **Organize** the parts by type and size to make the assembly process easier.\n",
|
||||
"\n",
|
||||
"#### Step 2: Assemble the Main Frame\n",
|
||||
"1. **Connect the Side Panels**: Attach the side panels to the back panel using screws and dowels as indicated in the manual.\n",
|
||||
"2. **Secure the Bottom Panel**: Attach the bottom panel to the side panels.\n",
|
||||
"\n",
|
||||
"#### Step 3: Attach the Shelves\n",
|
||||
"1. **Install the Lower Shelves**: Insert the lower shelves into the designated slots and secure them with screws.\n",
|
||||
"2. **Install the Upper Shelves**: Repeat the process for the upper shelves.\n",
|
||||
"\n",
|
||||
"#### Step 4: Attach the Desktop\n",
|
||||
"1. **Align the Desktop**: Place the desktop on top of the frame, ensuring it is properly aligned.\n",
|
||||
"2. **Secure the Desktop**: Use screws to secure the desktop to the frame.\n",
|
||||
"\n",
|
||||
"#### Step 5: Install Additional Features\n",
|
||||
"1. **Attach Monitor Shelf**: If the Fredde desk includes a monitor shelf, attach it to the back panel using screws.\n",
|
||||
"2. **Install Side Extensions**: Attach any side extensions or additional shelves as per the instructions.\n",
|
||||
"\n",
|
||||
"#### Step 6: Final Adjustments\n",
|
||||
"1. **Check Stability**: Ensure all screws are tightened and the desk is stable.\n",
|
||||
"2. **Adjust Height**: If the desk has adjustable height features, set it to the desired height.\n",
|
||||
"\n",
|
||||
"#### Step 7: Clean Up\n",
|
||||
"1. **Remove Packaging**: Dispose of any packaging materials.\n",
|
||||
"2. **Organize Tools**: Put away your tools and clean the workspace.\n",
|
||||
"\n",
|
||||
"For the most accurate and detailed instructions, please refer to the assembly manual that comes with the Fredde desk. If you encounter any issues, IKEA customer service can provide additional support.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"It appears that the specific step-by-step instructions for assembling the Fredde desk are not available in the provided data. However, I can offer a general guide based on typical assembly procedures for IKEA furniture. For the most accurate and detailed instructions, please refer to the assembly manual that comes with the product.\n",
|
||||
"\n",
|
||||
"### General Assembly Guide for Fredde Desk:\n",
|
||||
"\n",
|
||||
"#### Tools Required:\n",
|
||||
"- Phillips screwdriver\n",
|
||||
"- Flathead screwdriver\n",
|
||||
"- Allen key (usually provided in the package)\n",
|
||||
"- Hammer (if needed for dowels)\n",
|
||||
"\n",
|
||||
"### Step-by-Step Assembly:\n",
|
||||
"\n",
|
||||
"#### Step 1: Unpack and Organize\n",
|
||||
"1. **Unpack** all the parts and hardware.\n",
|
||||
"2. **Organize** the parts by type and size to make the assembly process easier.\n",
|
||||
"\n",
|
||||
"#### Step 2: Assemble the Main Frame\n",
|
||||
"1. **Connect the Side Panels**: Attach the side panels to the back panel using screws and dowels as indicated in the manual.\n",
|
||||
"2. **Secure the Bottom Panel**: Attach the bottom panel to the side panels.\n",
|
||||
"\n",
|
||||
"#### Step 3: Attach the Shelves\n",
|
||||
"1. **Install the Lower Shelves**: Insert the lower shelves into the designated slots and secure them with screws.\n",
|
||||
"2. **Install the Upper Shelves**: Repeat the process for the upper shelves.\n",
|
||||
"\n",
|
||||
"#### Step 4: Attach the Desktop\n",
|
||||
"1. **Align the Desktop**: Place the desktop on top of the frame, ensuring it is properly aligned.\n",
|
||||
"2. **Secure the Desktop**: Use screws to secure the desktop to the frame.\n",
|
||||
"\n",
|
||||
"#### Step 5: Install Additional Features\n",
|
||||
"1. **Attach Monitor Shelf**: If the Fredde desk includes a monitor shelf, attach it to the back panel using screws.\n",
|
||||
"2. **Install Side Extensions**: Attach any side extensions or additional shelves as per the instructions.\n",
|
||||
"\n",
|
||||
"#### Step 6: Final Adjustments\n",
|
||||
"1. **Check Stability**: Ensure all screws are tightened and the desk is stable.\n",
|
||||
"2. **Adjust Height**: If the desk has adjustable height features, set it to the desired height.\n",
|
||||
"\n",
|
||||
"#### Step 7: Clean Up\n",
|
||||
"1. **Remove Packaging**: Dispose of any packaging materials.\n",
|
||||
"2. **Organize Tools**: Put away your tools and clean the workspace.\n",
|
||||
"\n",
|
||||
"For the most accurate and detailed instructions, please refer to the assembly manual that comes with the Fredde desk. If you encounter any issues, IKEA customer service can provide additional support."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = agent.chat(\"How do I assemble the Fredde?\")\n",
|
||||
"display(Markdown(str(response)))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama-parse-5ZmnAQ0r-py3.11",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@@ -11,21 +11,22 @@
|
||||
"\n",
|
||||
"We use LlamaParse to load in our slides in .pptx format, and use LlamaIndex to build a RAG pipeline over these files.\n",
|
||||
"\n",
|
||||
"**NOTE**: LlamaParse is capable of image extraction through JSON mode, in this notebook we stick with text."
|
||||
"**NOTE**: LlamaParse is capable of image extraction through JSON mode, in this notebook we stick with text.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Prior to Feb-2025 | N/A | Deprecated |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "14cdcfaf-88b4-4489-9910-e362e0ccec53",
|
||||
"id": "bbd1a042",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"from llama_cloud_services import LlamaParse"
|
||||
"%pip install \"llama-index>=0.13.0<0.14.0\" llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,7 +38,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-\""
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -369,9 +371,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llama_parse",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llama_parse"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -6,7 +6,12 @@
|
||||
"source": [
|
||||
"# LlamaParse - Parsing Financial Powerpoints 📊\n",
|
||||
"\n",
|
||||
"In this cookbook we show you how to use LlamaParse to parse a financial powerpoint."
|
||||
"In this cookbook we show you how to use LlamaParse to parse a financial powerpoint.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Prior to Feb-2025 | N/A | Deprecated |"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -12,6 +12,11 @@
|
||||
"\n",
|
||||
"These instructions can be useful for improving the parser's performance on complex document layouts, extracting data in a specific format, or transforming the document in other ways.\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-20-2025 | 0.6.61 | Maintained |\n",
|
||||
"\n",
|
||||
"### Why This Matters:\n",
|
||||
"Traditional document parsing can be rigid and error-prone, often missing crucial context and nuances in complex layouts. Our instruction-based parsing allows you to:\n",
|
||||
"\n",
|
||||
@@ -21,15 +26,7 @@
|
||||
"4. Save hours of manual data entry and verification\n",
|
||||
"5. Reduce errors in document processing workflows\n",
|
||||
"\n",
|
||||
"In this demonstration, we showcase how parsing instructions can be used to extract specific information from unstructured documents. Below are the documents we use for testing:\n",
|
||||
"\n",
|
||||
"1. McDonald's Receipt - Extracting the price of each order and the final amount to be paid.\n",
|
||||
"\n",
|
||||
"2. Expense Report Document - Extracting employee name, employee ID, position, department, date ranges, individual expense items with dates, categories, and amounts.\n",
|
||||
"\n",
|
||||
"3. Purchase Order Document - Identifying the PO number, vendor details, shipping terms, and an itemized list of products with quantities and unit prices.\n",
|
||||
"\n",
|
||||
"Let's jump into these real-world examples and see how parsing instructions can help us extract specific information."
|
||||
"In this demonstration, we showcase how parsing instructions can be used to extract specific information from unstructured documents. Using a McDonald's Receipt, we show how to ignore parts of the document and only parse the price of each order and the final amount to be paid."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,7 +42,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install llama-cloud-services"
|
||||
"%pip install llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -61,10 +58,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\""
|
||||
@@ -95,136 +88,67 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 66643b81-e2f4-408b-890b-8e116472210b\n"
|
||||
"Started parsing the file under job_id 31862c97-ac1b-46ed-b5b7-42ca4d0ffe70\n",
|
||||
"\n",
|
||||
"# McDonald's Receipt\n",
|
||||
"\n",
|
||||
"> Rate us HIGHLY SATISFIED and \n",
|
||||
"> Receive ONE FREE ITEM \n",
|
||||
"> Purchase any sandwich and receive an \n",
|
||||
"> item of equal or lesser value \n",
|
||||
"> Go to www.mcdvoice.com within 7 days \n",
|
||||
"> and tell us about your visit. \n",
|
||||
"> Validation Code: \n",
|
||||
"> Expires 30 days after receipt date. \n",
|
||||
"> Valid at participating US McDonald's. \n",
|
||||
"\n",
|
||||
"**Survey Code:** \n",
|
||||
"31278-01121-21018-20481-00081-0 \n",
|
||||
"\n",
|
||||
"**McDonald's Restaurant #31278** \n",
|
||||
"2378 PINE RD NW \n",
|
||||
"RICE, MN 56367-9740 \n",
|
||||
"TEL# 320 393 4600 \n",
|
||||
"\n",
|
||||
"| KS# | Date | Time | Order |\n",
|
||||
"|------|------------|---------|--------|\n",
|
||||
"| 1 | 12/08/2022 | 08:48 PM| 12 |\n",
|
||||
"\n",
|
||||
"| Item | Price |\n",
|
||||
"|--------------------------|-------|\n",
|
||||
"| 1 Happy Meal 6 Pc | 4.89 |\n",
|
||||
"| - Creamy Ranch Cup | |\n",
|
||||
"| - Extra Kids Fry | |\n",
|
||||
"| - Wreck It Ralph 2 | |\n",
|
||||
"| - S Coke | |\n",
|
||||
"| 1 Snack Oreo McFlurry | 2.69 |\n",
|
||||
"\n",
|
||||
"| Subtotal | 7.58 |\n",
|
||||
"| Tax | 0.52 |\n",
|
||||
"| Take-Out Total | 8.10 |\n",
|
||||
"| Cash Tendered | 10.00 |\n",
|
||||
"| Change | 1.90 |\n",
|
||||
"\n",
|
||||
"McDonald's Restaurant Rice \n",
|
||||
"***NOW ACCEPTING APPLICATIONS*** \n",
|
||||
"text to #36453 \n",
|
||||
"apply31278 \n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_cloud_services import LlamaParse\n",
|
||||
"\n",
|
||||
"vanilaParsing = LlamaParse(result_type=\"markdown\").load_data(\"./mcdonalds_receipt.png\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# Rate us HIGHLY SATISFIED\n",
|
||||
"\n",
|
||||
"Purchase any sandwich and receive a FREE ITEM\n",
|
||||
"\n",
|
||||
"Go to WWW.mcdvoice.com within 7 days of purchase of equal or lesser value and tell us about your visit.\n",
|
||||
"\n",
|
||||
"Validation Code: 31278-01121-21018-20481-00081-0\n",
|
||||
"\n",
|
||||
"Valid at participating US McDonald's\n",
|
||||
"\n",
|
||||
"Expires 30 days after receipt date\n",
|
||||
"\n",
|
||||
"# McDonald's Restaurant #312782378\n",
|
||||
"\n",
|
||||
"PINE RD NW\n",
|
||||
"\n",
|
||||
"RICE MN 56367-9740\n",
|
||||
"\n",
|
||||
"TEL# 320 393 4600\n",
|
||||
"\n",
|
||||
"KS# 12/08/2022 08:48 PM\n",
|
||||
"\n",
|
||||
"# Order\n",
|
||||
"\n",
|
||||
"|Happy Meal 6 Pc|$4.89|\n",
|
||||
"|---|---|\n",
|
||||
"|Creamy Ranch Cup| |\n",
|
||||
"|Extra Kids Fry| |\n",
|
||||
"|Wreck It Ralph 2 Snack| |\n",
|
||||
"|Oreo McFlurry|$2.69|\n",
|
||||
"\n",
|
||||
"# Summary\n",
|
||||
"\n",
|
||||
"|Subtotal|$7.58|\n",
|
||||
"|---|---|\n",
|
||||
"|Tax|$0.52|\n",
|
||||
"|Take-Out Total|$8.10|\n",
|
||||
"|Cash Tendered|$10.00|\n",
|
||||
"|Change|$1.90|\n",
|
||||
"\n",
|
||||
"### Not ACCEPTING APPLICATIONS *++ McDonald's Restaurant Rice\n",
|
||||
"\n",
|
||||
"Text to #36453 apply 31278\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(vanilaParsing[0].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 1a04fdbb-5415-4a36-a1bd-26bfb5d618fa\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parsingInstruction = \"\"\"The provided document is a McDonald's receipt.\n",
|
||||
" Provide the price of each order and final amount to be paid.\"\"\"\n",
|
||||
"withInstructionParsing = LlamaParse(\n",
|
||||
" result_type=\"markdown\", parsing_instruction=parsingInstruction\n",
|
||||
").load_data(\"./mcdonalds_receipt.png\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here are the prices for each order from the McDonald's receipt:\n",
|
||||
"\n",
|
||||
"1. Happy Meal 6 Pc: $4.89\n",
|
||||
"2. Snack Oreo McFlurry: $2.69\n",
|
||||
"\n",
|
||||
"**Subtotal:** $7.58\n",
|
||||
"**Tax:** $0.52\n",
|
||||
"**Total Amount to be Paid:** $8.10\n",
|
||||
"\n",
|
||||
"The cash tendered was $10.00, and the change given was $1.90.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(withInstructionParsing[0].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Expense Report Document\n",
|
||||
"vanilla_result = await LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
").aparse(\"./mcdonalds_receipt.png\")\n",
|
||||
"\n",
|
||||
"Here we extract employee name, employee ID, position, department, date ranges, individual expense items with dates, categories, and amounts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"expense_report_document.png\" alt=\"Alt Text\" width=\"500\">"
|
||||
"print(vanilla_result.pages[0].md)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -236,354 +160,44 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id b6bcc6e1-7d30-4522-9abd-ace196781a70\n"
|
||||
"Started parsing the file under job_id 3f4dcd5a-2ef0-4022-9bd3-a85df9ec7664\n",
|
||||
"\n",
|
||||
"* Happy Meal 6 Pc 4.89 \n",
|
||||
" - Creamy Ranch Cup \n",
|
||||
" - Extra Kids Fry \n",
|
||||
" - Wreck It Ralph 2 \n",
|
||||
" - S Coke \n",
|
||||
"* Snack Oreo McFlurry 2.69 \n",
|
||||
"\n",
|
||||
"Subtotal 7.58 \n",
|
||||
"Tax 0.52 \n",
|
||||
"Take-Out Total 8.10 \n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vanilaParsing = LlamaParse(result_type=\"markdown\").load_data(\n",
|
||||
" \"./expense_report_document.pdf\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# QUANTUM DYNAMICS CORPORATION\n",
|
||||
"\n",
|
||||
"# EMPLOYEE EXPENSE REPORT\n",
|
||||
"\n",
|
||||
"# FISCAL YEAR 2024\n",
|
||||
"\n",
|
||||
"# EMPLOYEE INFORMATION:\n",
|
||||
"\n",
|
||||
"Name: Dr. Alexandra Chen-Martinez, PhD\n",
|
||||
"\n",
|
||||
"Employee ID: QD-2022-1457\n",
|
||||
"\n",
|
||||
"Department: Advanced Research & Development\n",
|
||||
"\n",
|
||||
"Cost Center: CC-ARD-NA-003\n",
|
||||
"\n",
|
||||
"Project Codes: QD-QUANTUM-2024-01, QD-AI-2024-03\n",
|
||||
"\n",
|
||||
"Position: Principal Research Scientist\n",
|
||||
"\n",
|
||||
"Reporting Manager: Dr. James Thompson\n",
|
||||
"\n",
|
||||
"# TRIP/EXPENSE PERIOD:\n",
|
||||
"\n",
|
||||
"Start Date: November 15, 2024\n",
|
||||
"\n",
|
||||
"End Date: December 10, 2024\n",
|
||||
"\n",
|
||||
"Purpose: International Conference Attendance & Client Meetings\n",
|
||||
"\n",
|
||||
"Locations: Tokyo, Japan → Singapore → Sydney, Australia\n",
|
||||
"\n",
|
||||
"# CURRENCY CONVERSION RATES APPLIED:\n",
|
||||
"\n",
|
||||
"JPY (¥) → USD: 0.0068 (as of 11/15/2024)\n",
|
||||
"\n",
|
||||
"SGD (S$) → USD: 0.74 (as of 11/28/2024)\n",
|
||||
"\n",
|
||||
"AUD (A$) → USD: 0.65 (as of 12/03/2024)\n",
|
||||
"\n",
|
||||
"# ITEMIZED EXPENSES:\n",
|
||||
"\n",
|
||||
"|Date|Category|Description|Original|Currency|USD|\n",
|
||||
"|---|---|---|---|---|---|\n",
|
||||
"|11/15/2024|Transportation|JFK → NRT Business Class|4,250.00|USD|4,250.00|\n",
|
||||
"|Booking Ref: QF78956 - Corporate Rate Applied|Booking Ref: QF78956 - Corporate Rate Applied|Booking Ref: QF78956 - Corporate Rate Applied|Booking Ref: QF78956 - Corporate Rate Applied|Booking Ref: QF78956 - Corporate Rate Applied|Booking Ref: QF78956 - Corporate Rate Applied|\n",
|
||||
"|Project Code: QD-QUANTUM-2024-01|Project Code: QD-QUANTUM-2024-01|Project Code: QD-QUANTUM-2024-01|Project Code: QD-QUANTUM-2024-01|Project Code: QD-QUANTUM-2024-01|Project Code: QD-QUANTUM-2024-01|\n",
|
||||
"|11/16/2024|Accommodation|Hilton Tokyo - 5 nights|225,000|JPY|1,530.00|\n",
|
||||
"|Confirmation: HTK-2024-78956|Confirmation: HTK-2024-78956|Confirmation: HTK-2024-78956|Confirmation: HTK-2024-78956|Confirmation: HTK-2024-78956|Confirmation: HTK-2024-78956|\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(vanilaParsing[0].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id 7b0d05bb-947b-4475-8d0f-f10386f7446e\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parsingInstruction = \"\"\"You are provided with an expense report. \n",
|
||||
"Extract employee name, employee id, position, department, date ranges, individual expense items with dates, categories, and amounts.\"\"\"\n",
|
||||
"parsing_instruction = \"\"\"The provided document is a McDonald's receipt. Provide ONLY each line item (item name and price) and the final amount to be paid.\"\"\"\n",
|
||||
"\n",
|
||||
"withInstructionParsing = LlamaParse(\n",
|
||||
" result_type=\"markdown\", parsing_instruction=parsingInstruction\n",
|
||||
").load_data(\"./expense_report_document.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"**Employee Information:**\n",
|
||||
"- **Name:** Dr. Alexandra Chen-Martinez, PhD\n",
|
||||
"- **Employee ID:** QD-2022-1457\n",
|
||||
"- **Position:** Principal Research Scientist\n",
|
||||
"- **Department:** Advanced Research & Development\n",
|
||||
"\n",
|
||||
"**Trip/Expense Period:**\n",
|
||||
"- **Start Date:** November 15, 2024\n",
|
||||
"- **End Date:** December 10, 2024\n",
|
||||
"\n",
|
||||
"**Expense Items:**\n",
|
||||
"1. **Date:** 11/15/2024\n",
|
||||
"- **Category:** Transportation\n",
|
||||
"- **Description:** JFK → NRT Business Class\n",
|
||||
"- **Original Amount:** $4,250.00\n",
|
||||
"- **Currency:** USD\n",
|
||||
"- **USD Amount:** $4,250.00\n",
|
||||
"- **Booking Reference:** QF78956 - Corporate Rate Applied\n",
|
||||
"- **Project Code:** QD-QUANTUM-2024-01\n",
|
||||
"\n",
|
||||
"2. **Date:** 11/16/2024\n",
|
||||
"- **Category:** Accommodation\n",
|
||||
"- **Description:** Hilton Tokyo - 5 nights\n",
|
||||
"- **Original Amount:** ¥225,000\n",
|
||||
"- **Currency:** JPY\n",
|
||||
"- **USD Amount:** $1,530.00\n",
|
||||
"- **Confirmation:** HTK-2024-78956\n",
|
||||
"\n",
|
||||
"**Locations:**\n",
|
||||
"- Tokyo, Japan\n",
|
||||
"- Singapore\n",
|
||||
"- Sydney, Australia\n",
|
||||
"\n",
|
||||
"**Currency Conversion Rates Applied:**\n",
|
||||
"- JPY (¥) → USD: 0.0068 (as of 11/15/2024)\n",
|
||||
"- SGD (S$) → USD: 0.74 (as of 11/28/2024)\n",
|
||||
"- AUD (A$) → USD: 0.65 (as of 12/03/2024)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(withInstructionParsing[0].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Purchase Order Document \n",
|
||||
"result_with_instruction = await LlamaParse(\n",
|
||||
" parse_mode=\"parse_page_with_agent\",\n",
|
||||
" model=\"openai-gpt-4-1-mini\",\n",
|
||||
" high_res_ocr=True,\n",
|
||||
" outlined_table_extraction=True,\n",
|
||||
" output_tables_as_HTML=True,\n",
|
||||
" # Inject the parsing instruction into the user prompt\n",
|
||||
" user_prompt=parsing_instruction,\n",
|
||||
").aparse(\"./mcdonalds_receipt.png\")\n",
|
||||
"\n",
|
||||
"Here we identify the PO number, vendor details, shipping terms, and an itemized list of products with quantities and unit prices."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"purchase_order_document.png\" alt=\"Alt Text\" width=\"500\">"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id b8cb11c3-7dce-4e6a-94bb-1a4e50e45e55\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vanilaParsing = LlamaParse(result_type=\"markdown\").load_data(\n",
|
||||
" \"./purchase_order_document.pdf\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# GLOBAL TECH SOLUTIONS, INC.\n",
|
||||
"\n",
|
||||
"# PURCHASE ORDER\n",
|
||||
"\n",
|
||||
"Document Reference: PO-2024-GT-9876/REV.2\n",
|
||||
"\n",
|
||||
"[Original: PO-2024-GT-9876]\n",
|
||||
"\n",
|
||||
"Amendment Date: 12/10/2024\n",
|
||||
"\n",
|
||||
"# VENDOR INFORMATION:\n",
|
||||
"\n",
|
||||
"Quantum Electronics Manufacturing\n",
|
||||
"\n",
|
||||
"DUNS: 78-456-7890\n",
|
||||
"\n",
|
||||
"Tax ID: EU8976543210\n",
|
||||
"\n",
|
||||
"Hoofdorp, Netherlands\n",
|
||||
"\n",
|
||||
"Vendor #: QEM-EU-2024-001\n",
|
||||
"\n",
|
||||
"# SHIP TO:\n",
|
||||
"\n",
|
||||
"Global Tech Solutions, Inc.\n",
|
||||
"\n",
|
||||
"Building 7A, Innovation Park\n",
|
||||
"\n",
|
||||
"2100 Technology Drive\n",
|
||||
"\n",
|
||||
"Austin, TX 78701\n",
|
||||
"\n",
|
||||
"USA\n",
|
||||
"\n",
|
||||
"Attn: Sarah Martinez, Receiving Manager\n",
|
||||
"\n",
|
||||
"Tel: +1 (512) 555-0123\n",
|
||||
"\n",
|
||||
"# PAYMENT TERMS:\n",
|
||||
"\n",
|
||||
"Net 45\n",
|
||||
"\n",
|
||||
"2% discount if paid within 15 days\n",
|
||||
"\n",
|
||||
"# SHIPPING TERMS:\n",
|
||||
"\n",
|
||||
"DDP (Delivered Duty Paid) - Incoterms 2020\n",
|
||||
"\n",
|
||||
"Insurance Required: Yes\n",
|
||||
"\n",
|
||||
"Preferred Carrier: DHL/FedEx\n",
|
||||
"\n",
|
||||
"Required Delivery Date: 01/15/2025\n",
|
||||
"\n",
|
||||
"# SPECIAL INSTRUCTIONS:\n",
|
||||
"\n",
|
||||
"1. All shipments must include Certificate of Conformance\n",
|
||||
"2. ESD-sensitive items must be properly packaged\n",
|
||||
"3. Temperature logging required for items marked with *\n",
|
||||
"4. Partial shipments accepted with prior approval\n",
|
||||
"5. Quote PO number on all correspondence\n",
|
||||
"\n",
|
||||
"# ITEM DETAILS:\n",
|
||||
"\n",
|
||||
"|Line|Part Number|Description|Qty|UOM|Unit Price|Total|\n",
|
||||
"|---|---|---|---|---|---|---|\n",
|
||||
"|1|QE-MCU-5590|Microcontroller Unit|500|EA|$12.50|$6,250.00|\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(vanilaParsing[0].text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Started parsing the file under job_id d2731305-984d-4633-8a52-0493748cf10b\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parsingInstruction = \"\"\"You are provided with a purchase order. \n",
|
||||
"Identify the PO number, vendor details, shipping terms, and itemized list of products with quantities and unit prices.\"\"\"\n",
|
||||
"\n",
|
||||
"withInstructionParsing = LlamaParse(\n",
|
||||
" result_type=\"markdown\", parsing_instruction=parsingInstruction\n",
|
||||
").load_data(\"./purchase_order_document.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here are the details extracted from the purchase order:\n",
|
||||
"\n",
|
||||
"**PO Number:** PO-2024-GT-9876/REV.2\n",
|
||||
"\n",
|
||||
"**Vendor Details:**\n",
|
||||
"- **Vendor Name:** Quantum Electronics Manufacturing\n",
|
||||
"- **DUNS:** 78-456-7890\n",
|
||||
"- **Tax ID:** EU8976543210\n",
|
||||
"- **Address:** Hoofdorp, Netherlands\n",
|
||||
"- **Vendor Number:** QEM-EU-2024-001\n",
|
||||
"- **Contact Person:** Sarah Martinez, Receiving Manager\n",
|
||||
"- **Phone:** +1 (512) 555-0123\n",
|
||||
"\n",
|
||||
"**Shipping Terms:**\n",
|
||||
"- **Terms:** DDP (Delivered Duty Paid) - Incoterms 2020\n",
|
||||
"- **Insurance Required:** Yes\n",
|
||||
"- **Preferred Carrier:** DHL/FedEx\n",
|
||||
"- **Required Delivery Date:** 01/15/2025\n",
|
||||
"\n",
|
||||
"**Itemized List of Products:**\n",
|
||||
"1. **Part Number:** QE-MCU-5590\n",
|
||||
"- **Description:** Microcontroller Unit\n",
|
||||
"- **Quantity:** 500 EA\n",
|
||||
"- **Unit Price:** $12.50\n",
|
||||
"- **Total:** $6,250.00\n",
|
||||
"\n",
|
||||
"**Payment Terms:**\n",
|
||||
"- Net 45\n",
|
||||
"- 2% discount if paid within 15 days\n",
|
||||
"\n",
|
||||
"**Special Instructions:**\n",
|
||||
"1. All shipments must include Certificate of Conformance\n",
|
||||
"2. ESD-sensitive items must be properly packaged\n",
|
||||
"3. Temperature logging required for items marked with *\n",
|
||||
"4. Partial shipments accepted with prior approval\n",
|
||||
"5. Quote PO number on all correspondence\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(withInstructionParsing[0].text)"
|
||||
"print(result_with_instruction.pages[0].md)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llamacloud",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "llamacloud"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/parsing_modes/demo_auto_mode.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Prior to Feb-2025 | N/A | Deprecated |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Many documents can have varying complexity across pages - some pages have text, and other pages have images. The text-only pages only require cheap parsing modes, whereas the image-based pages require more advanced modes. In this notebook we show you how to take advantage of \"auto mode\" in LlamaParse which adaptively parses different pages according to different modes, which lets you get optimal performance at the cheapest cost.\n"
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/parsing_modes/demo_layout_agent_mode_visual_citations.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Prior to Feb-2025 | N/A | Deprecated |\n",
|
||||
"\n",
|
||||
"This cookbook will show you how to leverage LlamaParse's new Layout Agent mode to build a query engine that provides visually grounded citations. But first—what exactly is Layout Agent mode?\n",
|
||||
"\n",
|
||||
"## Layout Agent Mode\n",
|
||||
|
||||
@@ -16,7 +16,21 @@
|
||||
"\n",
|
||||
"We'll use two sample documents:\n",
|
||||
"- Apple 2021 10-K filing (text-heavy financial document)\n",
|
||||
"- GenAI Research Report (visual-rich document with charts and diagrams)"
|
||||
"- GenAI Research Report (visual-rich document with charts and diagrams)\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Aug-18-2025 | 0.6.61 | Maintained |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install \"llama-index>=0.13.0<0.14.0\" llama-cloud-services"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,11 @@
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/parse/test_tesla_impact_report/test_gpt4o.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"Status:\n",
|
||||
"| Last Executed | Version | State |\n",
|
||||
"|---------------|---------|------------|\n",
|
||||
"| Prior to Feb-2025 | N/A | Deprecated |\n",
|
||||
"\n",
|
||||
"GPT-4o is a [fully multimodal model by OpenAI](https://openai.com/index/hello-gpt-4o/) released in May 2024. It matches GPT-4 Turbo performance in text and code, and has significantly improved vision and audio capabilities.\n",
|
||||
"\n",
|
||||
"The expanded vision/audio capabilities mean that it can be used for document parsing, by treating each page as an image and performing document extraction. We support using GPT-4o natively in LlamaParse for document parsing. The notebook below walks you through an example of using GPT-4o over the Tesla impact report.\n",
|
||||
|
||||
Reference in New Issue
Block a user