This commit is contained in:
Lance Martin
2024-12-13 16:29:52 -08:00
commit 71dab4ee98
11 changed files with 620 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
ANTHROPIC_API_KEY=<your_anthropic_api_key>
+55
View File
@@ -0,0 +1,55 @@
# Robo Blogger
Robo Blogger is an assistant that transforms voice recordings into polished blog posts, making content creation effortless and efficient.
## Quickstart
Set API keys for the LLM of choice (default is Anthropic Claude 3.5 Sonnet):
```
export ANTHROPIC_API_KEY=<your_anthropic_api_key>
```
Clone the repository and launch the assistant [with the LangGraph server](https://langchain-ai.github.io/langgraph/cloud/reference/cli/#dev):
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
git clone https://github.com/langchain-ai/robo_blogger.git
cd robo_blogger
uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.11 langgraph dev
```
You should see the following output and Studio will open in your browser:
- 🚀 API: http://127.0.0.1:2024
- 🎨 Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- 📚 API Docs: http://127.0.0.1:2024/docs
Use a dictation app (e.g., [Flowwise](https://www.flowvoice.ai/)) to dictate some high level notes about the blog post you want to write:
* Save your dictation to a file in the `notes` folder (e.g., `blog_notes.txt`).
* For example, with Flowwise in Cursor, you can simply hold down the `fn` key (on Mac) and dictate your notes.
In Studio inputs:
* Provide the name of the dictation file (e.g., `blog_notes.txt`) in the `configuration` tab.
* Optionally, provide any links to documentation that you want to use to write the blog post.
In the `configuration` tab, you can optionally customize the blog post structure.
## Motivation
LangChain blog posts typically follow a consistent structure:
1. High level overview of the topic
2. Code documentation and examples
3. Structured content walkthrough
While this structure is clear, getting from initial thoughts to a polished first draft can be challenging. Robo Blogger streamlines this process by requiring only:
- A voice recording of your initial thoughts
- Optional documentation links
- Optional custom blog structure
The workflow is simple:
1. **Voice Capture**: Record your thoughts using any dictation app (e.g., Flowwise)
2. **Planning**: Claude 3.5 Sonnet converts your dictation, links, and structure into a coherent plan
3. **Writing**: Automated generation of each blog section following the plan
This approach builds on concepts from our previous [Report mAIstro](https://github.com/langchain-ai/report-mAIstro) project.
+11
View File
@@ -0,0 +1,11 @@
{
"dockerfile_lines": [],
"graphs": {
"robo_blogger": "./src/agent/graph.py:graph"
},
"python_version": "3.11",
"env": "./.env",
"dependencies": [
"."
]
}
+56
View File
@@ -0,0 +1,56 @@
[project]
name = "robo-blogger"
version = "0.0.1"
description = "Assistant for creating full blog posts from an audio dictation."
authors = [
{ name = "Lance Martin" }
]
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.9"
dependencies = [
"langgraph>=0.2.55",
"langchain-community>=0.3.9",
"langchain-anthropic>=0.3.0",
"beautifulsoup4>=4.12.2",
]
[project.optional-dependencies]
dev = ["mypy>=1.11.1", "ruff>=0.6.1"]
[build-system]
requires = ["setuptools>=73.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools]
packages = ["agent"]
[tool.setuptools.package-dir]
"agent" = "src/agent"
[tool.setuptools.package-data]
"*" = ["py.typed"]
[tool.ruff]
lint.select = [
"E", # pycodestyle
"F", # pyflakes
"I", # isort
"D", # pydocstyle
"D401", # First line should be in imperative mood
"T201",
"UP",
]
lint.ignore = [
"UP006",
"UP007",
"UP035",
"D417",
"E501",
]
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["D", "UP"]
[tool.ruff.lint.pydocstyle]
convention = "google"
View File
+47
View File
@@ -0,0 +1,47 @@
import os
from dataclasses import dataclass, fields
from typing import Any, Optional
from langchain_core.runnables import RunnableConfig
from dataclasses import dataclass
DEFAULT_BLOG_STRUCTURE = """The blog post should follow this strict three-part structure:
1. Introduction (max 1 section)
- Start with ### Key Links and include user-provided links
- Brief overview of the problem statement
- Brief overview of the solution/main topic
- Maximum 100 words
2. Main Body (exactly 2-3 sections)
- Each section must:
* Cover a distinct aspect of the main topic
* Include at least one relevant code snippet
* Be 150-200 words
- No overlap between sections
3. Conclusion (max 1 section)
- Brief summary of key points
- Key Links
- Clear call to action
- Maximum 150 words"""
@dataclass(kw_only=True)
class Configuration:
"""The configurable fields for the chatbot."""
blog_structure: str = DEFAULT_BLOG_STRUCTURE
@classmethod
def from_runnable_config(
cls, config: Optional[RunnableConfig] = None
) -> "Configuration":
"""Create a Configuration instance from a RunnableConfig."""
configurable = (
config["configurable"] if config and "configurable" in config else {}
)
values: dict[str, Any] = {
f.name: os.environ.get(f.name.upper(), configurable.get(f.name))
for f in fields(cls)
if f.init
}
return cls(**{k: v for k, v in values.items() if v})
+166
View File
@@ -0,0 +1,166 @@
from langchain_anthropic import ChatAnthropic
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.runnables import RunnableConfig
from langgraph.constants import Send
from langgraph.graph import START, END, StateGraph
import agent.configuration as configuration
from agent.state import Sections, BlogState, BlogStateInput, BlogStateOutput, SectionState
from agent.prompts import blog_planner_instructions, main_body_section_writer_instructions, intro_conclusion_instructions
from agent.utils import load_and_format_urls, read_dictation_file, format_sections
# ------------------------------------------------------------
# LLMs
claude_3_5_sonnet = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=0)
# ------------------------------------------------------------
# Graph
def generate_blog_plan(state: BlogState, config: RunnableConfig):
""" Generate the report plan """
# Inputs
urls = state.urls
transcribed_notes_file = f"notes/{state.transcribed_notes_file}"
# Read transcribed notes
user_instructions = read_dictation_file(transcribed_notes_file)
# Load and format urls
url_source_str = "" if not urls else load_and_format_urls(urls)
# Get configuration
configurable = configuration.Configuration.from_runnable_config(config)
blog_structure = configurable.blog_structure
# Format system instructions
system_instructions_sections = blog_planner_instructions.format(blog_structure=blog_structure, user_instructions=user_instructions, source_urls=url_source_str)
# Generate sections
structured_llm = claude_3_5_sonnet.with_structured_output(Sections)
report_sections = structured_llm.invoke([SystemMessage(content=system_instructions_sections)]+[HumanMessage(content="Generate the sections of the blog. Your response must include a 'sections' field containing a list of sections. Each section must have: name, description, and content fields.")])
return {"sections": report_sections.sections}
def write_section(state: SectionState):
""" Write a section of the report """
# Get state
section = state.section
urls = state.urls
transcribed_notes_file = f"notes/{state.transcribed_notes_file}"
# Read transcribed notes
user_instructions = read_dictation_file(transcribed_notes_file)
# Load and format urls
url_source_str = "" if not urls else load_and_format_urls(urls)
# Format system instructions
system_instructions = main_body_section_writer_instructions.format(section_name=section.name,
section_topic=section.description,
user_instructions=user_instructions,
source_urls=url_source_str)
# Generate section
section_content = claude_3_5_sonnet.invoke([SystemMessage(content=system_instructions)]+[HumanMessage(content="Generate a blog section based on the provided information.")])
# Write content to the section object
section.content = section_content.content
# Write the updated section to completed sections
return {"completed_sections": [section]}
def write_final_sections(state: SectionState):
""" Write final sections of the report, which do not require web search and use the completed sections as context """
# Get state
section = state.section
# Format system instructions
system_instructions = intro_conclusion_instructions.format(section_name=section.name,
section_topic=section.description,
main_body_sections=state.blog_main_body_sections,
source_urls=state.urls)
# Generate section
section_content = claude_3_5_sonnet.invoke([SystemMessage(content=system_instructions)]+[HumanMessage(content="Generate an intro/conclusion section based on the provided main body sections.")])
# Write content to section
section.content = section_content.content
# Write the updated section to completed sections
return {"completed_sections": [section]}
def initiate_section_writing(state: BlogState):
""" This is the "map" step when we kick off web research for some sections of the report """
# Kick off section writing in parallel via Send() API for any sections that require research
return [
Send("write_section", SectionState(
section=s,
transcribed_notes_file=state.transcribed_notes_file,
urls=state.urls,
completed_sections=[] # Initialize with empty list
))
for s in state.sections
if s.main_body
]
def gather_completed_sections(state: BlogState):
""" Gather completed main body sections"""
# List of completed sections
completed_sections = state.completed_sections
# Format completed section to str to use as context for final sections
completed_report_sections = format_sections(completed_sections)
return {"blog_main_body_sections": completed_report_sections}
def initiate_final_section_writing(state: BlogState):
""" This is the "map" step when we kick off research on any sections that require it using the Send API """
# Kick off section writing in parallel via Send() API for any sections that do not require research
return [
Send("write_final_sections", SectionState(
section=s,
blog_main_body_sections=state.blog_main_body_sections,
urls=state.urls,
completed_sections=[] # Initialize with empty list
))
for s in state.sections
if not s.main_body
]
def compile_final_blog(state: BlogState):
""" Compile the final blog """
# Get sections
sections = state.sections
completed_sections = {s.name: s.content for s in state.completed_sections}
# Update sections with completed content while maintaining original order
for section in sections:
section.content = completed_sections[section.name]
# Compile final report
all_sections = "\n\n".join([s.content for s in sections])
return {"final_blog": all_sections}
# Add nodes and edges
builder = StateGraph(BlogState, input=BlogStateInput, output=BlogStateOutput, config_schema=configuration.Configuration)
builder.add_node("generate_blog_plan", generate_blog_plan)
builder.add_node("write_section", write_section)
builder.add_node("compile_final_blog", compile_final_blog)
builder.add_node("gather_completed_sections", gather_completed_sections)
builder.add_node("write_final_sections", write_final_sections)
builder.add_edge(START, "generate_blog_plan")
builder.add_conditional_edges("generate_blog_plan", initiate_section_writing, ["write_section"])
builder.add_edge("write_section", "gather_completed_sections")
builder.add_conditional_edges("gather_completed_sections", initiate_final_section_writing, ["write_final_sections"])
builder.add_edge("write_final_sections", "compile_final_blog")
builder.add_edge("compile_final_blog", END)
graph = builder.compile()
+12
View File
@@ -0,0 +1,12 @@
okay need to write blog post about AI memory systems...
first, we need to explain what memory actually is so lets cover the types of memory
semantic - like facts and stuff
episodic - past experiences
procedural - like instructions and rules
second, we want to cover short term vs long term memory differences
third, we want to cover memory management techniques
talk about conversation history management
mention that background vs real-time memory writing thing... pros and cons of each
+146
View File
@@ -0,0 +1,146 @@
blog_planner_instructions="""You are an expert technical writer, helping to plan a blog post.
Your goal is to generate a CONCISE outline with exactly 4-5 total sections (including intro and conclusion).
The blog must strictly follow this structure:
{blog_structure}
Rules for section planning:
1. Generate exactly ONE introduction section
2. Generate 2-3 main body sections that:
- Are clearly distinct from each other
- Cover different aspects of the topic
- Will include code snippets
3. Generate exactly ONE conclusion section
4. Avoid any redundancy between sections
Use this information to plan the sections:
User Instructions:
{user_instructions}
Source URLs (if provided):
{source_urls}
For each section, provide:
- Name - Clear, descriptive section name
- Description - Give an overview of the specific topics to be covered in this section of the blog
- Content - Leave blank for now
- Main Body - Whether this is a main body section
Final check:
1. Confirm that the sections are non-overlapping in topic and non-redundant
2. Confirm that each Section Description has a clearly stated scope that does not conflict with other sections"""
# Section writer instructions
main_body_section_writer_instructions = """You are an expert technical writer crafting one section of a blog post.
CONTEXT:
Section Name: {section_name}
Section Topic: {section_topic}
User Instructions: {user_instructions}
Reference Material: {source_urls}
WRITING GUIDELINES:
1. Structure:
- Start with a level-2 heading (##)
- Break content into 2-3 clear subsections
- Each paragraph should be 2-3 sentences maximum
- Include exactly one code example with explanation
- End with a brief standalone summary of the section's key points (no references to other sections)
2. Style Requirements:
- Technical and precise language
- Active voice
- Zero marketing language
- Concrete examples over abstract concepts
- Clear topic sentences
- Markdown formatting
3. Code Example Requirements:
- Must be practical and executable
- Include brief comments explaining key parts
- Maximum 10 lines of code
- Must directly relate to section topic
4. Length and Format:
- Strict 150-200 words (excluding code)
- Use markdown formatting:
* ## for section heading
* ``` for code blocks
* ** for emphasis when needed
* - for bullet points if necessary
QUALITY CHECKLIST:
[ ] Meets word count (150-200 words)
[ ] Contains one clear code example
[ ] Uses proper markdown formatting
[ ] Maintains technical focus
[ ] Connects logically to section topic
[ ] Free of marketing language
[ ] Includes transition to next section
Generate the section content now, focusing on clarity and technical accuracy."""
# Intro/conclusion instructions
intro_conclusion_instructions = """You are an expert technical writer crafting the introduction or conclusion of a blog post.
Name for this section:
{section_name}
Topic for this section:
{section_topic}
Guidelines for writing:
1. Length and Style:
- Technical focus with zero jargon
- Active voice only
- Each paragraph 2-3 sentences maximum
- No marketing language or buzzwords
- Must be self-contained (no "In this blog..." or "As we discussed...")
2. Section-Specific Requirements:
FOR INTRODUCTION:
- Format: # Title (must be attention-grabbing but technical)
- Structure:
* First paragraph: Hook + problem statement
* Second paragraph: Solution overview
* Final paragraph: What reader will learn
- Word limit: Strict 50-100 words
- Required elements:
* ### Key Links section at bottom
* One concrete example or use case
- Prohibited elements:
* No lists, tables, or code
* No future tense about what "will be covered"
FOR CONCLUSION:
- Format: ## Summary and Next Steps
- Structure:
* First paragraph: Key takeaways
* Second paragraph: Practical applications
* Final paragraph: Call to action (focused on technical implementation)
- Word limit: Strict 100-150 words
- Choose exactly ONE:
* Markdown table comparing key concepts
* Bulleted list of implementation steps
* Code snippet showing complete minimal example
3. Context:
Main body sections:
{main_body_sections}
Reference URLs:
{source_urls}
4. Quality Requirements:
[ ] Meets exact word count
[ ] Uses proper markdown formatting
[ ] Contains no marketing language
[ ] Includes required structural elements
[ ] Links directly to main body content
[ ] Maintains technical focus throughout"""
+56
View File
@@ -0,0 +1,56 @@
import operator
from dataclasses import dataclass, field
from pydantic import BaseModel, Field
from typing_extensions import Annotated, List
class Section(BaseModel):
name: str = Field(
description="Name for this section of the report.",
)
description: str = Field(
description="Brief overview of the main topics and concepts to be covered in this section.",
)
content: str = Field(
description="The content of the section."
)
main_body: bool = Field(
description="Whether this is a main body section."
)
class Sections(BaseModel):
sections: List[Section] = Field(
description="Sections of the report.",
)
@dataclass(kw_only=True)
class BlogState:
transcribed_notes_file: str = field(default=None) # Blog notes
urls: List[str] = field(default_factory=list) # List of urls
sections: list[Section] = field(default_factory=list)
completed_sections: Annotated[list, operator.add] # Send() API key
blog_main_body_sections: str = field(default=None) # Main body sections from research
final_blog: str = field(default=None) # Final report
@dataclass(kw_only=True)
class BlogStateInput:
transcribed_notes_file: str = field(default="blog_notes.txt") # Blog notes
urls: List[str] = field(default_factory=list) # List of urls
@dataclass(kw_only=True)
class BlogStateOutput:
final_blog: str = field(default=None) # Final report
from dataclasses import dataclass, field
from typing import List
@dataclass(kw_only=True)
class SectionState:
section: Section # Report section
transcribed_notes_file: str = field(default=None) # Blog notes
urls: List[str] = field(default_factory=list) # List of urls ]
blog_main_body_sections: str = field(default=None) # Main body sections from research
completed_sections: list[Section] = field(default_factory=list) # Final key we duplicate in outer state for Send() API
@dataclass(kw_only=True)
class SectionOutputState:
completed_sections: list[Section] = field(default_factory=list) # Final key we duplicate in outer state for Send() API
+70
View File
@@ -0,0 +1,70 @@
from langchain_community.document_loaders import WebBaseLoader
from agent.state import Section
def load_and_format_urls(url_list):
"""Load web pages from URLs and format them into a readable string.
Args:
url_list (str or list): Single URL or list of URLs to load and format
Returns:
str: Formatted string containing metadata and content from all loaded documents,
separated by '---' delimiters. Each document includes:
- Title
- Source URL
- Description
- Page content
"""
loader = WebBaseLoader(url_list)
docs = loader.load()
formatted_docs = []
for doc in docs:
# Format metadata
metadata_str = (
f"Title: {doc.metadata.get('title', 'N/A')}\n"
f"Source: {doc.metadata.get('source', 'N/A')}\n"
f"Description: {doc.metadata.get('description', 'N/A')}\n"
)
# Format content (strip extra whitespace and newlines)
content = doc.page_content.strip()
# Combine metadata and content
formatted_doc = f"---\n{metadata_str}\nContent:\n{content}\n---"
formatted_docs.append(formatted_doc)
# Join all documents with double newlines
return "\n\n".join(formatted_docs)
def read_dictation_file(file_path: str) -> str:
"""Read content from a text file audio-to-text dictation."""
try:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
except FileNotFoundError:
print(f"Warning: File not found at {file_path}")
return ""
except Exception as e:
print(f"Error reading file: {e}")
return ""
def format_sections(sections: list[Section]) -> str:
""" Format a list of sections into a string """
formatted_str = ""
for idx, section in enumerate(sections, 1):
formatted_str += f"""
{'='*60}
Section {idx}: {section.name}
{'='*60}
Description:
{section.description}
Main body:
{section.main_body}
Content:
{section.content if section.content else '[Not yet written]'}
"""