handle multiple codeblocks (#15)

This commit is contained in:
Vadym Barda
2025-04-05 12:24:48 -04:00
committed by GitHub
parent e732f61178
commit f8cca3a4e4
3 changed files with 241 additions and 5 deletions
+5 -5
View File
@@ -7,6 +7,8 @@ from langchain_core.tools import tool as create_tool
from langgraph.graph import END, START, MessagesState, StateGraph
from langgraph.types import Command
from langgraph_codeact.utils import extract_and_combine_codeblocks
class CodeActState(MessagesState):
"""State for CodeAct agent."""
@@ -75,11 +77,9 @@ def create_codeact(
def call_model(state: CodeActState) -> Command:
messages = [{"role": "system", "content": prompt}] + state["messages"]
response = model.invoke(messages)
if "```" in response.content:
# get content between fences
code = response.content.split("```")[1]
# remove first line, which is the language or empty string
code = "\n".join(code.splitlines()[1:])
# Extract and combine all code blocks
code = extract_and_combine_codeblocks(response.content)
if code:
return Command(goto="sandbox", update={"messages": [response], "script": code})
else:
# no code block, end the loop and respond to the user
+61
View File
@@ -0,0 +1,61 @@
import re
BACKTICK_PATTERN = r"(?:^|\n)```(.*?)(?:```(?:\n|$))"
def extract_and_combine_codeblocks(text: str) -> str:
"""
Extracts all codeblocks from a text string and combines them into a single code string.
Args:
text: A string containing zero or more codeblocks, where each codeblock is
surrounded by triple backticks (```).
Returns:
A string containing the combined code from all codeblocks, with each codeblock
separated by a newline.
Example:
text = '''Here's some code:
```python
print('hello')
```
And more:
```
print('world')
```'''
result = extract_and_combine_codeblocks(text)
Result:
print('hello')
print('world')
"""
# Find all code blocks in the text using regex
# Pattern matches anything between triple backticks, with or without a language identifier
code_blocks = re.findall(BACKTICK_PATTERN, text, re.DOTALL)
if not code_blocks:
return ""
# Process each codeblock
processed_blocks = []
for block in code_blocks:
# Strip leading and trailing whitespace
block = block.strip()
# If the first line looks like a language identifier, remove it
lines = block.split("\n")
if lines and (not lines[0].strip() or " " not in lines[0].strip()):
# First line is empty or likely a language identifier (no spaces)
block = "\n".join(lines[1:])
processed_blocks.append(block)
# Combine all codeblocks with newlines between them
combined_code = "\n\n".join(processed_blocks)
return combined_code
+175
View File
@@ -0,0 +1,175 @@
from langgraph_codeact.utils import extract_and_combine_codeblocks
def test_empty_text():
"""Test when the input text has no codeblocks."""
text = "This is a text without any code blocks."
result = extract_and_combine_codeblocks(text)
assert result == ""
def test_single_codeblock_no_language():
"""Test extracting a single codeblock without language identifier."""
text = """Here is a code block:
```
print("Hello, world!")
x = 10
```
End of the code."""
expected = """\
print("Hello, world!")
x = 10\
"""
result = extract_and_combine_codeblocks(text)
assert result == expected
def test_single_codeblock_with_language():
"""Test extracting a single codeblock with language identifier."""
text = """Here is a code block:
```python
print("Hello, world!")
x = 10
```
End of the code."""
expected = """\
print("Hello, world!")
x = 10\
"""
result = extract_and_combine_codeblocks(text)
assert result == expected
def test_multiple_codeblocks():
"""Test extracting and combining multiple codeblocks."""
text = """Here's the first code block:
```python
def hello():
print("Hello!")
```
And here's the second one:
```python
result = 42
print(f"The answer is {result}")
```"""
expected = """\
def hello():
print("Hello!")
result = 42
print(f"The answer is {result}")\
"""
result = extract_and_combine_codeblocks(text)
assert result == expected
def test_multiple_codeblocks_mixed():
"""Test codeblocks with a mix of language identifiers / no identifiers."""
text = """Different language identifiers:
```python
x = 10
```
```python
y = 20
```
```
z = 30
```"""
expected = """\
x = 10
y = 20
z = 30\
"""
result = extract_and_combine_codeblocks(text)
assert result == expected
def test_empty_codeblock():
"""Test an empty codeblock."""
text = "Empty block: `````` should be ignored."
result = extract_and_combine_codeblocks(text)
assert result == ""
def test_language_with_spaces():
"""Test a codeblock with a language identifier containing spaces."""
text = """Here is code with a more unusual language tag:
```python code
x = 10
y = 20
```"""
# The first line shouldn't be removed since it contains spaces
expected = """\
python code
x = 10
y = 20\
"""
result = extract_and_combine_codeblocks(text)
assert result == expected
def test_with_nested_backticks():
"""Test with nested backticks inside the code block."""
text = """Code with nested backticks:
```
def example():
code = "```nested```"
return code
```"""
expected = """\
def example():
code = "```nested```"
return code\
"""
result = extract_and_combine_codeblocks(text)
assert result == expected
def test_realistic_example():
"""Test with a realistic example similar to the one provided in the user query."""
text = """First, I'll find where the baseball lands when hit by the batter. Then, I'll calculate where the ball lands after being thrown by the outfielder.
```python
# Constants
g = 9.81 # acceleration due to gravity
v0_batter = 45.847 # initial velocity
angle_batter_deg = 23.474 # angle in degrees
print(f"The ball lands {distance:.2f} meters away")
```
Now, let's calculate the second trajectory:
```
# Outfielder's throw
v0_outfielder = 24.12 # initial velocity
distance_2 = v0_outfielder * 2 # simplified calculation
print(f"Final position: {distance_2:.2f} meters")
```"""
expected = """\
# Constants
g = 9.81 # acceleration due to gravity
v0_batter = 45.847 # initial velocity
angle_batter_deg = 23.474 # angle in degrees
print(f"The ball lands {distance:.2f} meters away")
# Outfielder's throw
v0_outfielder = 24.12 # initial velocity
distance_2 = v0_outfielder * 2 # simplified calculation
print(f"Final position: {distance_2:.2f} meters")\
"""
result = extract_and_combine_codeblocks(text)
assert result == expected