Add code

2026-07-01 20:14:01 -04:00 · 2024-12-13 16:29:52 -08:00
commit 71dab4ee98
11 changed files with 620 additions and 0 deletions
@@ -0,0 +1 @@
+ANTHROPIC_API_KEY=<your_anthropic_api_key>
@@ -0,0 +1,55 @@
+# Robo Blogger
+
+Robo Blogger is an assistant that transforms voice recordings into polished blog posts, making content creation effortless and efficient.
+
+## Quickstart
+
+Set API keys for the LLM of choice (default is Anthropic Claude 3.5 Sonnet):
+```
+export ANTHROPIC_API_KEY=<your_anthropic_api_key>
+```
+
+Clone the repository and launch the assistant [with the LangGraph server](https://langchain-ai.github.io/langgraph/cloud/reference/cli/#dev):
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+git clone https://github.com/langchain-ai/robo_blogger.git
+cd robo_blogger
+uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.11 langgraph dev
+```
+
+You should see the following output and Studio will open in your browser:
+
+- 🚀 API: http://127.0.0.1:2024
+- 🎨 Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
+- 📚 API Docs: http://127.0.0.1:2024/docs
+
+Use a dictation app (e.g., [Flowwise](https://www.flowvoice.ai/)) to dictate some high level notes about the blog post you want to write:
+
+* Save your dictation to a file in the `notes` folder (e.g., `blog_notes.txt`). 
+* For example, with Flowwise in Cursor, you can simply hold down the `fn` key (on Mac) and dictate your notes.
+
+In Studio inputs: 
+* Provide the name of the dictation file (e.g., `blog_notes.txt`) in the `configuration` tab.
+* Optionally, provide any links to documentation that you want to use to write the blog post.
+
+In the `configuration` tab, you can optionally customize the blog post structure. 
+
+## Motivation
+
+LangChain blog posts typically follow a consistent structure:
+
+1. High level overview of the topic
+2. Code documentation and examples
+3. Structured content walkthrough
+
+While this structure is clear, getting from initial thoughts to a polished first draft can be challenging. Robo Blogger streamlines this process by requiring only:
+- A voice recording of your initial thoughts
+- Optional documentation links
+- Optional custom blog structure
+
+The workflow is simple:
+1. **Voice Capture**: Record your thoughts using any dictation app (e.g., Flowwise)
+2. **Planning**: Claude 3.5 Sonnet converts your dictation, links, and structure into a coherent plan
+3. **Writing**: Automated generation of each blog section following the plan
+
+This approach builds on concepts from our previous [Report mAIstro](https://github.com/langchain-ai/report-mAIstro) project.
@@ -0,0 +1,11 @@
+{
+    "dockerfile_lines": [],
+    "graphs": {
+      "robo_blogger": "./src/agent/graph.py:graph"
+    },
+    "python_version": "3.11",
+    "env": "./.env",
+    "dependencies": [
+      "."
+    ]
+  }
@@ -0,0 +1,56 @@
+[project]
+name = "robo-blogger"
+version = "0.0.1"
+description = "Assistant for creating full blog posts from an audio dictation."
+authors = [
+    { name = "Lance Martin" }
+]
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.9" 
+dependencies = [
+    "langgraph>=0.2.55",
+    "langchain-community>=0.3.9",
+    "langchain-anthropic>=0.3.0",
+    "beautifulsoup4>=4.12.2",
+]
+
+[project.optional-dependencies]
+dev = ["mypy>=1.11.1", "ruff>=0.6.1"]
+
+[build-system]
+requires = ["setuptools>=73.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+packages = ["agent"]
+
+[tool.setuptools.package-dir]
+"agent" = "src/agent"
+
+[tool.setuptools.package-data]
+"*" = ["py.typed"]
+
+[tool.ruff]
+lint.select = [
+    "E",    # pycodestyle
+    "F",    # pyflakes
+    "I",    # isort
+    "D",    # pydocstyle
+    "D401", # First line should be in imperative mood
+    "T201",
+    "UP",
+]
+lint.ignore = [
+    "UP006",
+    "UP007",
+    "UP035",
+    "D417",
+    "E501",
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["D", "UP"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
@@ -0,0 +1,47 @@
+import os
+from dataclasses import dataclass, fields
+from typing import Any, Optional
+
+from langchain_core.runnables import RunnableConfig
+from dataclasses import dataclass
+
+DEFAULT_BLOG_STRUCTURE = """The blog post should follow this strict three-part structure:
+
+1. Introduction (max 1 section)
+   - Start with ### Key Links and include user-provided links  
+   - Brief overview of the problem statement
+   - Brief overview of the solution/main topic
+   - Maximum 100 words
+
+2. Main Body (exactly 2-3 sections)
+    - Each section must:
+      * Cover a distinct aspect of the main topic
+      * Include at least one relevant code snippet
+      * Be 150-200 words
+    - No overlap between sections
+
+3. Conclusion (max 1 section)
+   - Brief summary of key points
+   - Key Links
+   - Clear call to action
+   - Maximum 150 words"""
+
+@dataclass(kw_only=True)
+class Configuration:
+    """The configurable fields for the chatbot."""
+    blog_structure: str = DEFAULT_BLOG_STRUCTURE
+    
+    @classmethod
+    def from_runnable_config(
+        cls, config: Optional[RunnableConfig] = None
+    ) -> "Configuration":
+        """Create a Configuration instance from a RunnableConfig."""
+        configurable = (
+            config["configurable"] if config and "configurable" in config else {}
+        )
+        values: dict[str, Any] = {
+            f.name: os.environ.get(f.name.upper(), configurable.get(f.name))
+            for f in fields(cls)
+            if f.init
+        }
+        return cls(**{k: v for k, v in values.items() if v})
@@ -0,0 +1,166 @@
+from langchain_anthropic import ChatAnthropic 
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.runnables import RunnableConfig
+
+from langgraph.constants import Send
+from langgraph.graph import START, END, StateGraph
+
+import agent.configuration as configuration
+from agent.state import Sections, BlogState, BlogStateInput, BlogStateOutput, SectionState
+from agent.prompts import blog_planner_instructions, main_body_section_writer_instructions, intro_conclusion_instructions
+from agent.utils import load_and_format_urls, read_dictation_file, format_sections
+
+# ------------------------------------------------------------
+# LLMs 
+claude_3_5_sonnet = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=0) 
+
+# ------------------------------------------------------------
+# Graph
+def generate_blog_plan(state: BlogState, config: RunnableConfig):
+    """ Generate the report plan """
+
+    # Inputs
+    urls = state.urls
+    transcribed_notes_file = f"notes/{state.transcribed_notes_file}" 
+
+    # Read transcribed notes
+    user_instructions = read_dictation_file(transcribed_notes_file)
+
+    # Load and format urls
+    url_source_str = "" if not urls else load_and_format_urls(urls)
+
+    # Get configuration
+    configurable = configuration.Configuration.from_runnable_config(config)
+    blog_structure = configurable.blog_structure
+
+    # Format system instructions
+    system_instructions_sections = blog_planner_instructions.format(blog_structure=blog_structure, user_instructions=user_instructions, source_urls=url_source_str)
+
+    # Generate sections 
+    structured_llm = claude_3_5_sonnet.with_structured_output(Sections)
+    report_sections = structured_llm.invoke([SystemMessage(content=system_instructions_sections)]+[HumanMessage(content="Generate the sections of the blog. Your response must include a 'sections' field containing a list of sections. Each section must have: name, description, and content fields.")])
+
+    return {"sections": report_sections.sections}
+
+def write_section(state: SectionState):
+    """ Write a section of the report """
+
+    # Get state 
+    section = state.section
+    urls = state.urls
+    transcribed_notes_file = f"notes/{state.transcribed_notes_file}" 
+
+    # Read transcribed notes
+    user_instructions = read_dictation_file(transcribed_notes_file)
+
+    # Load and format urls
+    url_source_str = "" if not urls else load_and_format_urls(urls)
+
+    # Format system instructions
+    system_instructions = main_body_section_writer_instructions.format(section_name=section.name, 
+                                                                       section_topic=section.description, 
+                                                                       user_instructions=user_instructions, 
+                                                                       source_urls=url_source_str)
+
+    # Generate section  
+    section_content = claude_3_5_sonnet.invoke([SystemMessage(content=system_instructions)]+[HumanMessage(content="Generate a blog section based on the provided information.")])
+    
+    # Write content to the section object  
+    section.content = section_content.content
+
+    # Write the updated section to completed sections
+    return {"completed_sections": [section]}
+
+def write_final_sections(state: SectionState):
+    """ Write final sections of the report, which do not require web search and use the completed sections as context """
+
+    # Get state 
+    section = state.section
+    
+    # Format system instructions
+    system_instructions = intro_conclusion_instructions.format(section_name=section.name, 
+                                                               section_topic=section.description, 
+                                                               main_body_sections=state.blog_main_body_sections, 
+                                                               source_urls=state.urls)
+
+    # Generate section  
+    section_content = claude_3_5_sonnet.invoke([SystemMessage(content=system_instructions)]+[HumanMessage(content="Generate an intro/conclusion section based on the provided main body sections.")])
+    
+    # Write content to section 
+    section.content = section_content.content
+
+    # Write the updated section to completed sections
+    return {"completed_sections": [section]}
+
+def initiate_section_writing(state: BlogState):
+    """ This is the "map" step when we kick off web research for some sections of the report """    
+        
+    # Kick off section writing in parallel via Send() API for any sections that require research
+    return [
+        Send("write_section", SectionState(
+            section=s,
+            transcribed_notes_file=state.transcribed_notes_file,
+            urls=state.urls,
+            completed_sections=[]  # Initialize with empty list
+        )) 
+        for s in state.sections 
+        if s.main_body
+    ]
+
+def gather_completed_sections(state: BlogState):
+    """ Gather completed main body sections"""    
+
+    # List of completed sections
+    completed_sections = state.completed_sections
+
+    # Format completed section to str to use as context for final sections
+    completed_report_sections = format_sections(completed_sections)
+
+    return {"blog_main_body_sections": completed_report_sections}
+
+def initiate_final_section_writing(state: BlogState):
+    """ This is the "map" step when we kick off research on any sections that require it using the Send API """    
+
+    # Kick off section writing in parallel via Send() API for any sections that do not require research
+    return [
+        Send("write_final_sections", SectionState(
+            section=s,
+            blog_main_body_sections=state.blog_main_body_sections,
+            urls=state.urls,
+            completed_sections=[]  # Initialize with empty list
+        )) 
+        for s in state.sections 
+        if not s.main_body
+    ]
+
+def compile_final_blog(state: BlogState):
+    """ Compile the final blog """    
+
+    # Get sections
+    sections = state.sections
+    completed_sections = {s.name: s.content for s in state.completed_sections}
+
+    # Update sections with completed content while maintaining original order
+    for section in sections:
+        section.content = completed_sections[section.name]
+
+    # Compile final report
+    all_sections = "\n\n".join([s.content for s in sections])
+
+    return {"final_blog": all_sections}
+
+# Add nodes and edges 
+builder = StateGraph(BlogState, input=BlogStateInput, output=BlogStateOutput, config_schema=configuration.Configuration)
+builder.add_node("generate_blog_plan", generate_blog_plan)
+builder.add_node("write_section", write_section)
+builder.add_node("compile_final_blog", compile_final_blog)
+builder.add_node("gather_completed_sections", gather_completed_sections)
+builder.add_node("write_final_sections", write_final_sections)
+builder.add_edge(START, "generate_blog_plan")
+builder.add_conditional_edges("generate_blog_plan", initiate_section_writing, ["write_section"])
+builder.add_edge("write_section", "gather_completed_sections")
+builder.add_conditional_edges("gather_completed_sections", initiate_final_section_writing, ["write_final_sections"])
+builder.add_edge("write_final_sections", "compile_final_blog")
+builder.add_edge("compile_final_blog", END)
+
+graph = builder.compile() 
@@ -0,0 +1,12 @@
+okay need to write blog post about AI memory systems... 
+
+first, we need to explain what memory actually is so lets cover the types of memory
+semantic - like facts and stuff
+episodic - past experiences
+procedural - like instructions and rules
+
+second, we want to cover short term vs long term memory differences
+
+third, we want to cover memory management techniques
+talk about conversation history management
+mention that background vs real-time memory writing thing... pros and cons of each
@@ -0,0 +1,146 @@
+blog_planner_instructions="""You are an expert technical writer, helping to plan a blog post.
+
+Your goal is to generate a CONCISE outline with exactly 4-5 total sections (including intro and conclusion).
+
+The blog must strictly follow this structure: 
+
+{blog_structure}
+
+Rules for section planning:
+1. Generate exactly ONE introduction section
+2. Generate 2-3 main body sections that:
+   - Are clearly distinct from each other
+   - Cover different aspects of the topic
+   - Will include code snippets
+3. Generate exactly ONE conclusion section
+4. Avoid any redundancy between sections
+
+Use this information to plan the sections:
+
+User Instructions:
+{user_instructions}
+
+Source URLs (if provided):
+{source_urls}
+
+For each section, provide:
+- Name - Clear, descriptive section name
+- Description - Give an overview of the specific topics to be covered in this section of the blog
+- Content - Leave blank for now
+- Main Body - Whether this is a main body section
+
+Final check:
+1. Confirm that the sections are non-overlapping in topic and non-redundant
+2. Confirm that each Section Description has a clearly stated scope that does not conflict with other sections"""
+
+# Section writer instructions
+main_body_section_writer_instructions = """You are an expert technical writer crafting one section of a blog post.
+
+CONTEXT:
+Section Name: {section_name}
+Section Topic: {section_topic}
+User Instructions: {user_instructions}
+Reference Material: {source_urls}
+
+WRITING GUIDELINES:
+
+1. Structure:
+- Start with a level-2 heading (##)
+- Break content into 2-3 clear subsections
+- Each paragraph should be 2-3 sentences maximum
+- Include exactly one code example with explanation
+- End with a brief standalone summary of the section's key points (no references to other sections)
+
+2. Style Requirements:
+- Technical and precise language
+- Active voice
+- Zero marketing language
+- Concrete examples over abstract concepts
+- Clear topic sentences
+- Markdown formatting
+
+3. Code Example Requirements:
+- Must be practical and executable
+- Include brief comments explaining key parts
+- Maximum 10 lines of code
+- Must directly relate to section topic
+
+4. Length and Format:
+- Strict 150-200 words (excluding code)
+- Use markdown formatting:
+  * ## for section heading
+  * ``` for code blocks
+  * ** for emphasis when needed
+  * - for bullet points if necessary
+
+QUALITY CHECKLIST:
+[ ] Meets word count (150-200 words)
+[ ] Contains one clear code example
+[ ] Uses proper markdown formatting
+[ ] Maintains technical focus
+[ ] Connects logically to section topic
+[ ] Free of marketing language
+[ ] Includes transition to next section
+
+Generate the section content now, focusing on clarity and technical accuracy."""
+
+# Intro/conclusion instructions
+intro_conclusion_instructions = """You are an expert technical writer crafting the introduction or conclusion of a blog post.
+
+Name for this section:
+{section_name}
+
+Topic for this section:
+{section_topic}
+
+Guidelines for writing:
+
+1. Length and Style:
+- Technical focus with zero jargon
+- Active voice only
+- Each paragraph 2-3 sentences maximum
+- No marketing language or buzzwords
+- Must be self-contained (no "In this blog..." or "As we discussed...")
+
+2. Section-Specific Requirements:
+
+FOR INTRODUCTION:
+- Format: # Title (must be attention-grabbing but technical)
+- Structure:
+  * First paragraph: Hook + problem statement
+  * Second paragraph: Solution overview
+  * Final paragraph: What reader will learn
+- Word limit: Strict 50-100 words
+- Required elements:
+  * ### Key Links section at bottom
+  * One concrete example or use case
+- Prohibited elements:
+  * No lists, tables, or code
+  * No future tense about what "will be covered"
+
+FOR CONCLUSION:
+- Format: ## Summary and Next Steps
+- Structure:
+  * First paragraph: Key takeaways
+  * Second paragraph: Practical applications
+  * Final paragraph: Call to action (focused on technical implementation)
+- Word limit: Strict 100-150 words
+- Choose exactly ONE:
+  * Markdown table comparing key concepts
+  * Bulleted list of implementation steps
+  * Code snippet showing complete minimal example
+
+3. Context:
+Main body sections:
+{main_body_sections}
+
+Reference URLs:
+{source_urls}
+
+4. Quality Requirements:
+[ ] Meets exact word count
+[ ] Uses proper markdown formatting
+[ ] Contains no marketing language
+[ ] Includes required structural elements
+[ ] Links directly to main body content
+[ ] Maintains technical focus throughout"""
@@ -0,0 +1,56 @@
+import operator
+from dataclasses import dataclass, field
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, List
+
+class Section(BaseModel):
+    name: str = Field(
+        description="Name for this section of the report.",
+    )
+    description: str = Field(
+        description="Brief overview of the main topics and concepts to be covered in this section.",
+    )
+    content: str = Field(
+        description="The content of the section."
+    )   
+    main_body: bool = Field(
+        description="Whether this is a main body section."
+    )   
+
+class Sections(BaseModel):
+    sections: List[Section] = Field(
+        description="Sections of the report.",
+    )
+
+@dataclass(kw_only=True)
+class BlogState:
+    transcribed_notes_file: str = field(default=None) # Blog notes   
+    urls: List[str] = field(default_factory=list) # List of urls     
+    sections: list[Section] = field(default_factory=list) 
+    completed_sections: Annotated[list, operator.add] # Send() API key
+    blog_main_body_sections: str = field(default=None) # Main body sections from research
+    final_blog: str = field(default=None) # Final report
+    
+@dataclass(kw_only=True)
+class BlogStateInput:
+    transcribed_notes_file: str = field(default="blog_notes.txt") # Blog notes
+    urls: List[str] = field(default_factory=list) # List of urls     
+
+@dataclass(kw_only=True)
+class BlogStateOutput:
+    final_blog: str = field(default=None) # Final report
+
+from dataclasses import dataclass, field
+from typing import List
+
+@dataclass(kw_only=True)
+class SectionState:
+    section: Section # Report section   
+    transcribed_notes_file: str = field(default=None) # Blog notes   
+    urls: List[str] = field(default_factory=list) # List of urls  ]
+    blog_main_body_sections: str = field(default=None) # Main body sections from research
+    completed_sections: list[Section] = field(default_factory=list) # Final key we duplicate in outer state for Send() API
+    
+@dataclass(kw_only=True)
+class SectionOutputState:
+    completed_sections: list[Section] = field(default_factory=list) # Final key we duplicate in outer state for Send() API
@@ -0,0 +1,70 @@
+from langchain_community.document_loaders import WebBaseLoader
+from agent.state import Section
+
+def load_and_format_urls(url_list):
+    """Load web pages from URLs and format them into a readable string.
+    
+    Args:
+        url_list (str or list): Single URL or list of URLs to load and format
+        
+    Returns:
+        str: Formatted string containing metadata and content from all loaded documents,
+             separated by '---' delimiters. Each document includes:
+             - Title
+             - Source URL
+             - Description
+             - Page content
+    """
+
+    loader = WebBaseLoader(url_list)
+    docs = loader.load()
+
+    formatted_docs = []
+    
+    for doc in docs:
+        # Format metadata
+        metadata_str = (
+            f"Title: {doc.metadata.get('title', 'N/A')}\n"
+            f"Source: {doc.metadata.get('source', 'N/A')}\n"
+            f"Description: {doc.metadata.get('description', 'N/A')}\n"
+        )
+        
+        # Format content (strip extra whitespace and newlines)
+        content = doc.page_content.strip()
+        
+        # Combine metadata and content
+        formatted_doc = f"---\n{metadata_str}\nContent:\n{content}\n---"
+        formatted_docs.append(formatted_doc)
+    
+    # Join all documents with double newlines
+    return "\n\n".join(formatted_docs)
+
+def read_dictation_file(file_path: str) -> str:
+    """Read content from a text file audio-to-text dictation."""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return file.read()
+    except FileNotFoundError:
+        print(f"Warning: File not found at {file_path}")
+        return ""
+    except Exception as e:
+        print(f"Error reading file: {e}")
+        return ""
+    
+def format_sections(sections: list[Section]) -> str:
+    """ Format a list of sections into a string """
+    formatted_str = ""
+    for idx, section in enumerate(sections, 1):
+        formatted_str += f"""
+{'='*60}
+Section {idx}: {section.name}
+{'='*60}
+Description:
+{section.description}
+Main body: 
+{section.main_body}
+
+Content:
+{section.content if section.content else '[Not yet written]'}
+
+"""