mirror of
https://github.com/run-llama/vibe-llama.git
synced 2026-07-01 21:54:01 -04:00
update skills docs (#31)
* update skills docs * chore: vbump to align with vibe-llama-core --------- Co-authored-by: Clelia (Astra) Bertelli <clelia@runllama.ai>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: PDF Processing
|
||||
description: Extract text from PDFs in order to read and understand their content. Requires llama_cloud_services package and LLAMA_CLOUD_API_KEY as an environment variable.
|
||||
description: Invoke this skill BEFORE implementing any text extraction/parsing logic to learn how to use LlamaParse to process any document accurately. Requires llama_cloud_services package and LLAMA_CLOUD_API_KEY as an environment variable.
|
||||
---
|
||||
|
||||
# PDF Processing
|
||||
|
||||
@@ -32,7 +32,6 @@ extract_config = ExtractConfig(
|
||||
extraction_target=ExtractTarget.PER_DOC, # PER_DOC, PER_PAGE
|
||||
system_prompt="<Insert relevant context for extraction>", # set system prompt - can leave blank
|
||||
# Advanced options
|
||||
chunk_mode=ChunkMode.PAGE, # PAGE, SECTION
|
||||
high_resolution_mode=True, # Enable for better OCR
|
||||
nvalidate_cache=False, # Set to True to bypass cache
|
||||
# Extensions
|
||||
@@ -43,7 +42,7 @@ extract_config = ExtractConfig(
|
||||
|
||||
# Extract data directly from document - no agent needed!
|
||||
result = extractor.extract(Resume, config, "resume.pdf")
|
||||
print(result.data)
|
||||
print(Resume.model_validate(result.data))
|
||||
```
|
||||
|
||||
### Supported File Types
|
||||
@@ -207,7 +206,7 @@ be sufficient for a wide variety of use-cases.
|
||||
Configure how extraction is performed using `ExtractConfig`. The schema is the most important part, but several configuration options can significantly impact the extraction process.
|
||||
|
||||
```python
|
||||
from llama_cloud import ExtractConfig, ExtractMode, ChunkMode, ExtractTarget
|
||||
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
||||
|
||||
# Basic configuration
|
||||
config = ExtractConfig(
|
||||
@@ -220,7 +219,6 @@ config = ExtractConfig(
|
||||
# Advanced configuration
|
||||
advanced_config = ExtractConfig(
|
||||
extraction_mode=ExtractMode.MULTIMODAL,
|
||||
chunk_mode=ChunkMode.PAGE, # PAGE, SECTION
|
||||
high_resolution_mode=True, # Better OCR accuracy
|
||||
invalidate_cache=False, # Bypass cached results
|
||||
cite_sources=True, # Enable source citations
|
||||
@@ -247,7 +245,6 @@ advanced_config = ExtractConfig(
|
||||
|
||||
- `system_prompt`: Additional system-level instructions
|
||||
- `page_range`: Specific pages to extract (e.g., "1,3,5-7,9")
|
||||
- `chunk_mode`: Document splitting strategy (`PAGE` or `SECTION`)
|
||||
- `high_resolution_mode`: Better OCR for small text (slower processing)
|
||||
|
||||
**Extensions** (return additional metadata):
|
||||
@@ -290,7 +287,7 @@ agent = extractor.create_agent(
|
||||
|
||||
# Use the agent
|
||||
result = agent.extract("resume.pdf")
|
||||
print(result.data)
|
||||
print(Resume.model_validate(result.data))
|
||||
```
|
||||
|
||||
### Agent Batch Processing
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Extract structured data from unstructured files (PDF, PPTX, DOCX...)
|
||||
description: Extract structured data from PDFs and other unstructured file types in order to get the most relevant information from them. Requires llama_cloud_services package and LLAMA_CLOUD_API_KEY as an environment variable.
|
||||
description: Invoke this skill BEFORE implementing any structured data extraction from documents to learn the correct llama_cloud_services API usage. Required reading before writing extraction code. Requires llama_cloud_services package and LLAMA_CLOUD_API_KEY as an environment variable.
|
||||
---
|
||||
|
||||
# Structured Data Extraction
|
||||
@@ -19,6 +19,8 @@ class Resume(BaseModel):
|
||||
skills: list[str] = Field(description="Technical skills and technologies")
|
||||
```
|
||||
|
||||
**NOTE:** Use basic types when possible. Avoid nested dictionaries. Lists are ok.
|
||||
|
||||
- Create a LlamaExtract instance:
|
||||
|
||||
```python
|
||||
@@ -28,6 +30,8 @@ from llama_cloud_services import LlamaExtract
|
||||
extractor = LlamaExtract(
|
||||
show_progress=True,
|
||||
check_interval=5,
|
||||
# Optional API key, else reads from env
|
||||
# api_key=os.environ.get("LLAMA_CLOUD_API_KEY"),
|
||||
)
|
||||
```
|
||||
|
||||
@@ -43,7 +47,6 @@ extract_config = ExtractConfig(
|
||||
extraction_target=ExtractTarget.PER_DOC, # PER_DOC, PER_PAGE
|
||||
system_prompt="<Insert relevant context for extraction>", # set system prompt - can leave blank
|
||||
# Advanced options
|
||||
chunk_mode=ChunkMode.PAGE, # PAGE, SECTION
|
||||
high_resolution_mode=True, # Enable for better OCR
|
||||
nvalidate_cache=False, # Set to True to bypass cache
|
||||
# Extensions
|
||||
@@ -57,7 +60,9 @@ extract_config = ExtractConfig(
|
||||
|
||||
```python
|
||||
result = extractor.extract(Resume, config, "resume.pdf")
|
||||
print(result.data)
|
||||
|
||||
# result.data has our model as a python dict
|
||||
print(Resume.model_validate(result.data))
|
||||
```
|
||||
|
||||
For more detailed code implementations, see [REFERENCE.md](REFERENCE.md).
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Classify files according to specific rules
|
||||
description: Perfom classification of files (based on their content) into pre-defined categories (e.g. 'invoice', 'resume', 'email'). Requires the llama_cloud_services package and LLAMA_CLOUD_API_KEY as an environment variable.
|
||||
description: Invoke this skill BEFORE implementing any text/document classification task to learn the correct llama_cloud_services API usage. Required reading before writing classification code." Requires the llama_cloud_services package and LLAMA_CLOUD_API_KEY as an environment variable.
|
||||
---
|
||||
|
||||
# Texts and Files Classification
|
||||
@@ -35,6 +35,7 @@ rules = [
|
||||
from llama_cloud_services.beta.classifier.client import ClassifyClient
|
||||
|
||||
# Initialize client
|
||||
# Note: the beta client differs in usage slightly compared to other clients in llama-cloud-services
|
||||
classifier = ClassifyClient.from_api_key(api_key)
|
||||
|
||||
# Classify a PDF directly (parsing happens implicitly)
|
||||
|
||||
+2
-2
@@ -13,12 +13,12 @@ dev = [
|
||||
|
||||
[project]
|
||||
name = "vibe-llama"
|
||||
version = "0.4.7"
|
||||
version = "0.4.7.post1"
|
||||
description = "vibe-llama is a set of tools that are designed to help developers build working and reliable applications with LlamaIndex, LlamaCloud Services and llama-index-workflows."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"vibe-llama-core>=0.1.0",
|
||||
"vibe-llama-core>=0.1.1.post1",
|
||||
"bm25s>=0.2.13",
|
||||
"fastmcp>=2.11.3",
|
||||
"llama-cloud-services>=0.6.62",
|
||||
|
||||
Reference in New Issue
Block a user