mirror of
https://github.com/langchain-ai/robo-blogger.git
synced 2026-07-01 20:14:01 -04:00
Add code
This commit is contained in:
@@ -0,0 +1 @@
|
||||
ANTHROPIC_API_KEY=<your_anthropic_api_key>
|
||||
@@ -0,0 +1,55 @@
|
||||
# Robo Blogger
|
||||
|
||||
Robo Blogger is an assistant that transforms voice recordings into polished blog posts, making content creation effortless and efficient.
|
||||
|
||||
## Quickstart
|
||||
|
||||
Set API keys for the LLM of choice (default is Anthropic Claude 3.5 Sonnet):
|
||||
```
|
||||
export ANTHROPIC_API_KEY=<your_anthropic_api_key>
|
||||
```
|
||||
|
||||
Clone the repository and launch the assistant [with the LangGraph server](https://langchain-ai.github.io/langgraph/cloud/reference/cli/#dev):
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
git clone https://github.com/langchain-ai/robo_blogger.git
|
||||
cd robo_blogger
|
||||
uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.11 langgraph dev
|
||||
```
|
||||
|
||||
You should see the following output and Studio will open in your browser:
|
||||
|
||||
- 🚀 API: http://127.0.0.1:2024
|
||||
- 🎨 Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
|
||||
- 📚 API Docs: http://127.0.0.1:2024/docs
|
||||
|
||||
Use a dictation app (e.g., [Flowwise](https://www.flowvoice.ai/)) to dictate some high level notes about the blog post you want to write:
|
||||
|
||||
* Save your dictation to a file in the `notes` folder (e.g., `blog_notes.txt`).
|
||||
* For example, with Flowwise in Cursor, you can simply hold down the `fn` key (on Mac) and dictate your notes.
|
||||
|
||||
In Studio inputs:
|
||||
* Provide the name of the dictation file (e.g., `blog_notes.txt`) in the `configuration` tab.
|
||||
* Optionally, provide any links to documentation that you want to use to write the blog post.
|
||||
|
||||
In the `configuration` tab, you can optionally customize the blog post structure.
|
||||
|
||||
## Motivation
|
||||
|
||||
LangChain blog posts typically follow a consistent structure:
|
||||
|
||||
1. High level overview of the topic
|
||||
2. Code documentation and examples
|
||||
3. Structured content walkthrough
|
||||
|
||||
While this structure is clear, getting from initial thoughts to a polished first draft can be challenging. Robo Blogger streamlines this process by requiring only:
|
||||
- A voice recording of your initial thoughts
|
||||
- Optional documentation links
|
||||
- Optional custom blog structure
|
||||
|
||||
The workflow is simple:
|
||||
1. **Voice Capture**: Record your thoughts using any dictation app (e.g., Flowwise)
|
||||
2. **Planning**: Claude 3.5 Sonnet converts your dictation, links, and structure into a coherent plan
|
||||
3. **Writing**: Automated generation of each blog section following the plan
|
||||
|
||||
This approach builds on concepts from our previous [Report mAIstro](https://github.com/langchain-ai/report-mAIstro) project.
|
||||
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"dockerfile_lines": [],
|
||||
"graphs": {
|
||||
"robo_blogger": "./src/agent/graph.py:graph"
|
||||
},
|
||||
"python_version": "3.11",
|
||||
"env": "./.env",
|
||||
"dependencies": [
|
||||
"."
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
[project]
|
||||
name = "robo-blogger"
|
||||
version = "0.0.1"
|
||||
description = "Assistant for creating full blog posts from an audio dictation."
|
||||
authors = [
|
||||
{ name = "Lance Martin" }
|
||||
]
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"langgraph>=0.2.55",
|
||||
"langchain-community>=0.3.9",
|
||||
"langchain-anthropic>=0.3.0",
|
||||
"beautifulsoup4>=4.12.2",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["mypy>=1.11.1", "ruff>=0.6.1"]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=73.0.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["agent"]
|
||||
|
||||
[tool.setuptools.package-dir]
|
||||
"agent" = "src/agent"
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"*" = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
lint.select = [
|
||||
"E", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
"D", # pydocstyle
|
||||
"D401", # First line should be in imperative mood
|
||||
"T201",
|
||||
"UP",
|
||||
]
|
||||
lint.ignore = [
|
||||
"UP006",
|
||||
"UP007",
|
||||
"UP035",
|
||||
"D417",
|
||||
"E501",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"tests/*" = ["D", "UP"]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
from dataclasses import dataclass, fields
|
||||
from typing import Any, Optional
|
||||
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from dataclasses import dataclass
|
||||
|
||||
DEFAULT_BLOG_STRUCTURE = """The blog post should follow this strict three-part structure:
|
||||
|
||||
1. Introduction (max 1 section)
|
||||
- Start with ### Key Links and include user-provided links
|
||||
- Brief overview of the problem statement
|
||||
- Brief overview of the solution/main topic
|
||||
- Maximum 100 words
|
||||
|
||||
2. Main Body (exactly 2-3 sections)
|
||||
- Each section must:
|
||||
* Cover a distinct aspect of the main topic
|
||||
* Include at least one relevant code snippet
|
||||
* Be 150-200 words
|
||||
- No overlap between sections
|
||||
|
||||
3. Conclusion (max 1 section)
|
||||
- Brief summary of key points
|
||||
- Key Links
|
||||
- Clear call to action
|
||||
- Maximum 150 words"""
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class Configuration:
|
||||
"""The configurable fields for the chatbot."""
|
||||
blog_structure: str = DEFAULT_BLOG_STRUCTURE
|
||||
|
||||
@classmethod
|
||||
def from_runnable_config(
|
||||
cls, config: Optional[RunnableConfig] = None
|
||||
) -> "Configuration":
|
||||
"""Create a Configuration instance from a RunnableConfig."""
|
||||
configurable = (
|
||||
config["configurable"] if config and "configurable" in config else {}
|
||||
)
|
||||
values: dict[str, Any] = {
|
||||
f.name: os.environ.get(f.name.upper(), configurable.get(f.name))
|
||||
for f in fields(cls)
|
||||
if f.init
|
||||
}
|
||||
return cls(**{k: v for k, v in values.items() if v})
|
||||
@@ -0,0 +1,166 @@
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
from langgraph.constants import Send
|
||||
from langgraph.graph import START, END, StateGraph
|
||||
|
||||
import agent.configuration as configuration
|
||||
from agent.state import Sections, BlogState, BlogStateInput, BlogStateOutput, SectionState
|
||||
from agent.prompts import blog_planner_instructions, main_body_section_writer_instructions, intro_conclusion_instructions
|
||||
from agent.utils import load_and_format_urls, read_dictation_file, format_sections
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# LLMs
|
||||
claude_3_5_sonnet = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=0)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Graph
|
||||
def generate_blog_plan(state: BlogState, config: RunnableConfig):
|
||||
""" Generate the report plan """
|
||||
|
||||
# Inputs
|
||||
urls = state.urls
|
||||
transcribed_notes_file = f"notes/{state.transcribed_notes_file}"
|
||||
|
||||
# Read transcribed notes
|
||||
user_instructions = read_dictation_file(transcribed_notes_file)
|
||||
|
||||
# Load and format urls
|
||||
url_source_str = "" if not urls else load_and_format_urls(urls)
|
||||
|
||||
# Get configuration
|
||||
configurable = configuration.Configuration.from_runnable_config(config)
|
||||
blog_structure = configurable.blog_structure
|
||||
|
||||
# Format system instructions
|
||||
system_instructions_sections = blog_planner_instructions.format(blog_structure=blog_structure, user_instructions=user_instructions, source_urls=url_source_str)
|
||||
|
||||
# Generate sections
|
||||
structured_llm = claude_3_5_sonnet.with_structured_output(Sections)
|
||||
report_sections = structured_llm.invoke([SystemMessage(content=system_instructions_sections)]+[HumanMessage(content="Generate the sections of the blog. Your response must include a 'sections' field containing a list of sections. Each section must have: name, description, and content fields.")])
|
||||
|
||||
return {"sections": report_sections.sections}
|
||||
|
||||
def write_section(state: SectionState):
|
||||
""" Write a section of the report """
|
||||
|
||||
# Get state
|
||||
section = state.section
|
||||
urls = state.urls
|
||||
transcribed_notes_file = f"notes/{state.transcribed_notes_file}"
|
||||
|
||||
# Read transcribed notes
|
||||
user_instructions = read_dictation_file(transcribed_notes_file)
|
||||
|
||||
# Load and format urls
|
||||
url_source_str = "" if not urls else load_and_format_urls(urls)
|
||||
|
||||
# Format system instructions
|
||||
system_instructions = main_body_section_writer_instructions.format(section_name=section.name,
|
||||
section_topic=section.description,
|
||||
user_instructions=user_instructions,
|
||||
source_urls=url_source_str)
|
||||
|
||||
# Generate section
|
||||
section_content = claude_3_5_sonnet.invoke([SystemMessage(content=system_instructions)]+[HumanMessage(content="Generate a blog section based on the provided information.")])
|
||||
|
||||
# Write content to the section object
|
||||
section.content = section_content.content
|
||||
|
||||
# Write the updated section to completed sections
|
||||
return {"completed_sections": [section]}
|
||||
|
||||
def write_final_sections(state: SectionState):
|
||||
""" Write final sections of the report, which do not require web search and use the completed sections as context """
|
||||
|
||||
# Get state
|
||||
section = state.section
|
||||
|
||||
# Format system instructions
|
||||
system_instructions = intro_conclusion_instructions.format(section_name=section.name,
|
||||
section_topic=section.description,
|
||||
main_body_sections=state.blog_main_body_sections,
|
||||
source_urls=state.urls)
|
||||
|
||||
# Generate section
|
||||
section_content = claude_3_5_sonnet.invoke([SystemMessage(content=system_instructions)]+[HumanMessage(content="Generate an intro/conclusion section based on the provided main body sections.")])
|
||||
|
||||
# Write content to section
|
||||
section.content = section_content.content
|
||||
|
||||
# Write the updated section to completed sections
|
||||
return {"completed_sections": [section]}
|
||||
|
||||
def initiate_section_writing(state: BlogState):
|
||||
""" This is the "map" step when we kick off web research for some sections of the report """
|
||||
|
||||
# Kick off section writing in parallel via Send() API for any sections that require research
|
||||
return [
|
||||
Send("write_section", SectionState(
|
||||
section=s,
|
||||
transcribed_notes_file=state.transcribed_notes_file,
|
||||
urls=state.urls,
|
||||
completed_sections=[] # Initialize with empty list
|
||||
))
|
||||
for s in state.sections
|
||||
if s.main_body
|
||||
]
|
||||
|
||||
def gather_completed_sections(state: BlogState):
|
||||
""" Gather completed main body sections"""
|
||||
|
||||
# List of completed sections
|
||||
completed_sections = state.completed_sections
|
||||
|
||||
# Format completed section to str to use as context for final sections
|
||||
completed_report_sections = format_sections(completed_sections)
|
||||
|
||||
return {"blog_main_body_sections": completed_report_sections}
|
||||
|
||||
def initiate_final_section_writing(state: BlogState):
|
||||
""" This is the "map" step when we kick off research on any sections that require it using the Send API """
|
||||
|
||||
# Kick off section writing in parallel via Send() API for any sections that do not require research
|
||||
return [
|
||||
Send("write_final_sections", SectionState(
|
||||
section=s,
|
||||
blog_main_body_sections=state.blog_main_body_sections,
|
||||
urls=state.urls,
|
||||
completed_sections=[] # Initialize with empty list
|
||||
))
|
||||
for s in state.sections
|
||||
if not s.main_body
|
||||
]
|
||||
|
||||
def compile_final_blog(state: BlogState):
|
||||
""" Compile the final blog """
|
||||
|
||||
# Get sections
|
||||
sections = state.sections
|
||||
completed_sections = {s.name: s.content for s in state.completed_sections}
|
||||
|
||||
# Update sections with completed content while maintaining original order
|
||||
for section in sections:
|
||||
section.content = completed_sections[section.name]
|
||||
|
||||
# Compile final report
|
||||
all_sections = "\n\n".join([s.content for s in sections])
|
||||
|
||||
return {"final_blog": all_sections}
|
||||
|
||||
# Add nodes and edges
|
||||
builder = StateGraph(BlogState, input=BlogStateInput, output=BlogStateOutput, config_schema=configuration.Configuration)
|
||||
builder.add_node("generate_blog_plan", generate_blog_plan)
|
||||
builder.add_node("write_section", write_section)
|
||||
builder.add_node("compile_final_blog", compile_final_blog)
|
||||
builder.add_node("gather_completed_sections", gather_completed_sections)
|
||||
builder.add_node("write_final_sections", write_final_sections)
|
||||
builder.add_edge(START, "generate_blog_plan")
|
||||
builder.add_conditional_edges("generate_blog_plan", initiate_section_writing, ["write_section"])
|
||||
builder.add_edge("write_section", "gather_completed_sections")
|
||||
builder.add_conditional_edges("gather_completed_sections", initiate_final_section_writing, ["write_final_sections"])
|
||||
builder.add_edge("write_final_sections", "compile_final_blog")
|
||||
builder.add_edge("compile_final_blog", END)
|
||||
|
||||
graph = builder.compile()
|
||||
@@ -0,0 +1,12 @@
|
||||
okay need to write blog post about AI memory systems...
|
||||
|
||||
first, we need to explain what memory actually is so lets cover the types of memory
|
||||
semantic - like facts and stuff
|
||||
episodic - past experiences
|
||||
procedural - like instructions and rules
|
||||
|
||||
second, we want to cover short term vs long term memory differences
|
||||
|
||||
third, we want to cover memory management techniques
|
||||
talk about conversation history management
|
||||
mention that background vs real-time memory writing thing... pros and cons of each
|
||||
@@ -0,0 +1,146 @@
|
||||
blog_planner_instructions="""You are an expert technical writer, helping to plan a blog post.
|
||||
|
||||
Your goal is to generate a CONCISE outline with exactly 4-5 total sections (including intro and conclusion).
|
||||
|
||||
The blog must strictly follow this structure:
|
||||
|
||||
{blog_structure}
|
||||
|
||||
Rules for section planning:
|
||||
1. Generate exactly ONE introduction section
|
||||
2. Generate 2-3 main body sections that:
|
||||
- Are clearly distinct from each other
|
||||
- Cover different aspects of the topic
|
||||
- Will include code snippets
|
||||
3. Generate exactly ONE conclusion section
|
||||
4. Avoid any redundancy between sections
|
||||
|
||||
Use this information to plan the sections:
|
||||
|
||||
User Instructions:
|
||||
{user_instructions}
|
||||
|
||||
Source URLs (if provided):
|
||||
{source_urls}
|
||||
|
||||
For each section, provide:
|
||||
- Name - Clear, descriptive section name
|
||||
- Description - Give an overview of the specific topics to be covered in this section of the blog
|
||||
- Content - Leave blank for now
|
||||
- Main Body - Whether this is a main body section
|
||||
|
||||
Final check:
|
||||
1. Confirm that the sections are non-overlapping in topic and non-redundant
|
||||
2. Confirm that each Section Description has a clearly stated scope that does not conflict with other sections"""
|
||||
|
||||
# Section writer instructions
|
||||
main_body_section_writer_instructions = """You are an expert technical writer crafting one section of a blog post.
|
||||
|
||||
CONTEXT:
|
||||
Section Name: {section_name}
|
||||
Section Topic: {section_topic}
|
||||
User Instructions: {user_instructions}
|
||||
Reference Material: {source_urls}
|
||||
|
||||
WRITING GUIDELINES:
|
||||
|
||||
1. Structure:
|
||||
- Start with a level-2 heading (##)
|
||||
- Break content into 2-3 clear subsections
|
||||
- Each paragraph should be 2-3 sentences maximum
|
||||
- Include exactly one code example with explanation
|
||||
- End with a brief standalone summary of the section's key points (no references to other sections)
|
||||
|
||||
2. Style Requirements:
|
||||
- Technical and precise language
|
||||
- Active voice
|
||||
- Zero marketing language
|
||||
- Concrete examples over abstract concepts
|
||||
- Clear topic sentences
|
||||
- Markdown formatting
|
||||
|
||||
3. Code Example Requirements:
|
||||
- Must be practical and executable
|
||||
- Include brief comments explaining key parts
|
||||
- Maximum 10 lines of code
|
||||
- Must directly relate to section topic
|
||||
|
||||
4. Length and Format:
|
||||
- Strict 150-200 words (excluding code)
|
||||
- Use markdown formatting:
|
||||
* ## for section heading
|
||||
* ``` for code blocks
|
||||
* ** for emphasis when needed
|
||||
* - for bullet points if necessary
|
||||
|
||||
QUALITY CHECKLIST:
|
||||
[ ] Meets word count (150-200 words)
|
||||
[ ] Contains one clear code example
|
||||
[ ] Uses proper markdown formatting
|
||||
[ ] Maintains technical focus
|
||||
[ ] Connects logically to section topic
|
||||
[ ] Free of marketing language
|
||||
[ ] Includes transition to next section
|
||||
|
||||
Generate the section content now, focusing on clarity and technical accuracy."""
|
||||
|
||||
# Intro/conclusion instructions
|
||||
intro_conclusion_instructions = """You are an expert technical writer crafting the introduction or conclusion of a blog post.
|
||||
|
||||
Name for this section:
|
||||
{section_name}
|
||||
|
||||
Topic for this section:
|
||||
{section_topic}
|
||||
|
||||
Guidelines for writing:
|
||||
|
||||
1. Length and Style:
|
||||
- Technical focus with zero jargon
|
||||
- Active voice only
|
||||
- Each paragraph 2-3 sentences maximum
|
||||
- No marketing language or buzzwords
|
||||
- Must be self-contained (no "In this blog..." or "As we discussed...")
|
||||
|
||||
2. Section-Specific Requirements:
|
||||
|
||||
FOR INTRODUCTION:
|
||||
- Format: # Title (must be attention-grabbing but technical)
|
||||
- Structure:
|
||||
* First paragraph: Hook + problem statement
|
||||
* Second paragraph: Solution overview
|
||||
* Final paragraph: What reader will learn
|
||||
- Word limit: Strict 50-100 words
|
||||
- Required elements:
|
||||
* ### Key Links section at bottom
|
||||
* One concrete example or use case
|
||||
- Prohibited elements:
|
||||
* No lists, tables, or code
|
||||
* No future tense about what "will be covered"
|
||||
|
||||
FOR CONCLUSION:
|
||||
- Format: ## Summary and Next Steps
|
||||
- Structure:
|
||||
* First paragraph: Key takeaways
|
||||
* Second paragraph: Practical applications
|
||||
* Final paragraph: Call to action (focused on technical implementation)
|
||||
- Word limit: Strict 100-150 words
|
||||
- Choose exactly ONE:
|
||||
* Markdown table comparing key concepts
|
||||
* Bulleted list of implementation steps
|
||||
* Code snippet showing complete minimal example
|
||||
|
||||
3. Context:
|
||||
Main body sections:
|
||||
{main_body_sections}
|
||||
|
||||
Reference URLs:
|
||||
{source_urls}
|
||||
|
||||
4. Quality Requirements:
|
||||
[ ] Meets exact word count
|
||||
[ ] Uses proper markdown formatting
|
||||
[ ] Contains no marketing language
|
||||
[ ] Includes required structural elements
|
||||
[ ] Links directly to main body content
|
||||
[ ] Maintains technical focus throughout"""
|
||||
@@ -0,0 +1,56 @@
|
||||
import operator
|
||||
from dataclasses import dataclass, field
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated, List
|
||||
|
||||
class Section(BaseModel):
|
||||
name: str = Field(
|
||||
description="Name for this section of the report.",
|
||||
)
|
||||
description: str = Field(
|
||||
description="Brief overview of the main topics and concepts to be covered in this section.",
|
||||
)
|
||||
content: str = Field(
|
||||
description="The content of the section."
|
||||
)
|
||||
main_body: bool = Field(
|
||||
description="Whether this is a main body section."
|
||||
)
|
||||
|
||||
class Sections(BaseModel):
|
||||
sections: List[Section] = Field(
|
||||
description="Sections of the report.",
|
||||
)
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class BlogState:
|
||||
transcribed_notes_file: str = field(default=None) # Blog notes
|
||||
urls: List[str] = field(default_factory=list) # List of urls
|
||||
sections: list[Section] = field(default_factory=list)
|
||||
completed_sections: Annotated[list, operator.add] # Send() API key
|
||||
blog_main_body_sections: str = field(default=None) # Main body sections from research
|
||||
final_blog: str = field(default=None) # Final report
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class BlogStateInput:
|
||||
transcribed_notes_file: str = field(default="blog_notes.txt") # Blog notes
|
||||
urls: List[str] = field(default_factory=list) # List of urls
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class BlogStateOutput:
|
||||
final_blog: str = field(default=None) # Final report
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class SectionState:
|
||||
section: Section # Report section
|
||||
transcribed_notes_file: str = field(default=None) # Blog notes
|
||||
urls: List[str] = field(default_factory=list) # List of urls ]
|
||||
blog_main_body_sections: str = field(default=None) # Main body sections from research
|
||||
completed_sections: list[Section] = field(default_factory=list) # Final key we duplicate in outer state for Send() API
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class SectionOutputState:
|
||||
completed_sections: list[Section] = field(default_factory=list) # Final key we duplicate in outer state for Send() API
|
||||
@@ -0,0 +1,70 @@
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
from agent.state import Section
|
||||
|
||||
def load_and_format_urls(url_list):
|
||||
"""Load web pages from URLs and format them into a readable string.
|
||||
|
||||
Args:
|
||||
url_list (str or list): Single URL or list of URLs to load and format
|
||||
|
||||
Returns:
|
||||
str: Formatted string containing metadata and content from all loaded documents,
|
||||
separated by '---' delimiters. Each document includes:
|
||||
- Title
|
||||
- Source URL
|
||||
- Description
|
||||
- Page content
|
||||
"""
|
||||
|
||||
loader = WebBaseLoader(url_list)
|
||||
docs = loader.load()
|
||||
|
||||
formatted_docs = []
|
||||
|
||||
for doc in docs:
|
||||
# Format metadata
|
||||
metadata_str = (
|
||||
f"Title: {doc.metadata.get('title', 'N/A')}\n"
|
||||
f"Source: {doc.metadata.get('source', 'N/A')}\n"
|
||||
f"Description: {doc.metadata.get('description', 'N/A')}\n"
|
||||
)
|
||||
|
||||
# Format content (strip extra whitespace and newlines)
|
||||
content = doc.page_content.strip()
|
||||
|
||||
# Combine metadata and content
|
||||
formatted_doc = f"---\n{metadata_str}\nContent:\n{content}\n---"
|
||||
formatted_docs.append(formatted_doc)
|
||||
|
||||
# Join all documents with double newlines
|
||||
return "\n\n".join(formatted_docs)
|
||||
|
||||
def read_dictation_file(file_path: str) -> str:
|
||||
"""Read content from a text file audio-to-text dictation."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
return file.read()
|
||||
except FileNotFoundError:
|
||||
print(f"Warning: File not found at {file_path}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
print(f"Error reading file: {e}")
|
||||
return ""
|
||||
|
||||
def format_sections(sections: list[Section]) -> str:
|
||||
""" Format a list of sections into a string """
|
||||
formatted_str = ""
|
||||
for idx, section in enumerate(sections, 1):
|
||||
formatted_str += f"""
|
||||
{'='*60}
|
||||
Section {idx}: {section.name}
|
||||
{'='*60}
|
||||
Description:
|
||||
{section.description}
|
||||
Main body:
|
||||
{section.main_body}
|
||||
|
||||
Content:
|
||||
{section.content if section.content else '[Not yet written]'}
|
||||
|
||||
"""
|
||||
Reference in New Issue
Block a user