From a21a33263c1fed45470ccf677f329e8ee782abf2 Mon Sep 17 00:00:00 2001
From: Adrian Lyjak <adrianlyjak@gmail.com>
Date: Sat, 27 Sep 2025 13:13:31 -0400
Subject: [PATCH] Update all of the templates to remove test-proj, and migrate
 from vibe-llama templates

---
 .gitignore                   |   5 ++
 pyproject.toml               |  38 ++++++++++++
 src/web_scraping/__init__.py |   0
 src/web_scraping/workflow.py | 108 +++++++++++++++++++++++++++++++++++
 tests/test_placeholder.py    |  12 ++++
 5 files changed, 163 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 pyproject.toml
 create mode 100644 src/web_scraping/__init__.py
 create mode 100644 src/web_scraping/workflow.py
 create mode 100644 tests/test_placeholder.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..827c8a7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+workflows.db
+.venv
+.env
+package-lock.json
+node_modules
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..bb3cef5
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "web-scraping"
+version = "0.1.0"
+description = "A workflow that, given several urls, scrapes and summarizes their content."
+requires-python = ">=3.10"
+readme = "README.md"
+dependencies = [
+  "llama-index-workflows>=2.5.0,<3.0.0",
+  "llama-index-llms-google-genai"
+]
+
+[dependency-groups]
+dev = [
+    "hatch>=1.14.2",
+    "pytest>=8.4.2",
+    "ruff>=0.13.2",
+    "ty>=0.0.1a21",
+]
+
+[tool.hatch.envs.default.scripts]
+format = "ruff format ."
+format-check = "ruff format --check ."
+lint = "ruff check --fix ."
+lint-check = ["ruff check ."]
+typecheck = "ty check src"
+test = "pytest"
+all-check = ["format-check", "lint-check", "test"]
+all-fix = ["format", "lint", "test"]
+
+[tool.llamadeploy]
+env_files = [".env"]
+
+[tool.llamadeploy.workflows]
+default = "web_scraping.workflow:workflow"
diff --git a/src/web_scraping/__init__.py b/src/web_scraping/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/web_scraping/workflow.py b/src/web_scraping/workflow.py
new file mode 100644
index 0000000..d932c89
--- /dev/null
+++ b/src/web_scraping/workflow.py
@@ -0,0 +1,108 @@
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.core.llms import ChatMessage
+from google.genai.types import Tool, GenerateContentConfig, UrlContext
+from typing import Annotated
+from pydantic import BaseModel
+
+from workflows import Workflow, step, Context
+from workflows.events import Event, StartEvent, StopEvent
+from workflows.resource import Resource
+
+
+model_id = "gemini-2.5-flash"
+
+url_context_tool = Tool(url_context=UrlContext())
+
+config = GenerateContentConfig(
+    tools=[url_context_tool],
+    response_modalities=["TEXT"],
+)
+
+
+class URLState(BaseModel):
+    processed_urls: int = 0
+    final_content: str = ""
+
+
+async def get_llm(*args, **kwargs) -> GoogleGenAI:
+    return GoogleGenAI(model=model_id, generation_config=config)
+
+
+class URLReadEvent(Event):
+    url: str
+
+
+class URLContentEvent(Event):
+    content: str
+
+
+class WebScrapeWorkflow(Workflow):
+    @step
+    async def process_urls(
+        self, ev: StartEvent, ctx: Context[URLState]
+    ) -> URLReadEvent | None:
+        async with ctx.store.edit_state() as state:
+            state.processed_urls = len(ev.urls)
+        for url in ev.urls:
+            ctx.send_event(URLReadEvent(url=url))
+
+    @step
+    async def get_url_content(
+        self,
+        ev: URLReadEvent,
+        llm: Annotated[GoogleGenAI, Resource(get_llm)],
+        ctx: Context[URLState],
+    ) -> URLContentEvent:
+        response = llm.chat(
+            [
+                ChatMessage(
+                    role="user",
+                    content=f"Can you please summarize the context of this URL: {ev.url}",
+                )
+            ]
+        )
+        async with ctx.store.edit_state() as state:
+            state.final_content += (
+                f"### Summary for {ev.url}\\n\\n{response.message.content}\\n\\n"
+            )
+        return URLContentEvent(content=response.message.content or "")
+
+    @step
+    async def finalize(
+        self, ev: URLContentEvent, ctx: Context[URLState]
+    ) -> StopEvent | None:
+        state = await ctx.store.get_state()
+        events = ctx.collect_events(ev, [URLContentEvent] * state.processed_urls)
+        if events:
+            return StopEvent(result=state.final_content)
+
+
+workflow = WebScrapeWorkflow(timeout=None)
+
+
+async def main(urls: list[str]):
+    w = WebScrapeWorkflow(timeout=300)
+    result = await w.run(urls=urls)
+    print(str(result))
+
+
+if __name__ == "__main__":
+    import os
+    import asyncio
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--url",
+        help="URLs whose content needs to be summarised",
+        required=True,
+        action="append",
+    )
+    args = parser.parse_args()
+
+    if not os.getenv("GOOGLE_API_KEY", None):
+        raise ValueError(
+            "You need to set GOOGLE_API_KEY in your environment before using this workflow"
+        )
+
+    asyncio.run(main(args.url))
diff --git a/tests/test_placeholder.py b/tests/test_placeholder.py
new file mode 100644
index 0000000..3384ea0
--- /dev/null
+++ b/tests/test_placeholder.py
@@ -0,0 +1,12 @@
+"""Placeholder test file.
+
+Replace this with actual tests for your project.
+"""
+
+
+def test_placeholder() -> None:
+    """Placeholder test that always passes.
+
+    Remove this test once you add real tests to your project.
+    """
+    assert True