first commit

2026-06-30 21:17:55 -04:00 · 2025-05-19 22:26:50 +02:00
commit 74868bf274
14 changed files with 4121 additions and 0 deletions
@@ -0,0 +1,24 @@
+name: Linting
+
+on:
+  pull_request:
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Set up Python
+        run: uv python install 3.12
+
+      - name: Install pre-commit
+        shell: bash
+        run: uv venv && source .venv/bin/activate && uv pip install pre-commit
+
+      - name: Run linter
+        shell: bash
+        run: uv run -- pre-commit run -a
@@ -0,0 +1,5 @@
+.venv/
+.env
+scripts/.env
+*/__pycache__/
+scripts/output.png
@@ -0,0 +1,67 @@
+---
+default_language_version:
+  python: python3
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-byte-order-marker
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+      - id: check-yaml
+      - id: detect-private-key
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+      - id: trailing-whitespace
+
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.11.8
+    hooks:
+      - id: ruff
+        args: [--exit-non-zero-on-fix, --fix]
+        exclude: ".*poetry.lock|.*_static"
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.0.1
+    hooks:
+      - id: mypy
+        additional_dependencies:
+          [
+            "types-requests",
+            "types-Deprecated",
+            "types-redis",
+            "types-setuptools",
+            "types-PyYAML",
+            "types-protobuf==4.24.0.4",
+          ]
+        args:
+          [
+            --namespace-packages,
+            --explicit-package-bases,
+            --disallow-untyped-defs,
+            --ignore-missing-imports,
+            --python-version=3.9,
+          ]
+        entry: bash -c "export MYPYPATH=ingest_anything"
+
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 23.10.1
+    hooks:
+      - id: black-jupyter
+        name: black-docs-py
+        alias: black
+        files: ^(docs/|examples/)
+        # Using PEP 8's line length in docs prevents excess left/right scrolling
+        args: [--line-length=79]
+
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v3.0.3
+    hooks:
+      - id: prettier
+
+  - repo: https://github.com/pappasam/toml-sort
+    rev: v0.23.1
+    hooks:
+      - id: toml-sort-fix
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Jerry Liu
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,3 @@
+# Image Generation Agent
+
+README coming soon!
@@ -0,0 +1,178 @@
+[build-system]
+build-backend = "hatchling.build"
+requires = ["hatchling"]
+
+[lint.flake8-annotations]
+mypy-init-return = true
+
+[lint.pydocstyle]
+convention = "google"
+
+[project]
+authors = [{email = "clelia@runllama.ai", name = "Clelia Astra Bertelli"}]
+classifiers = [
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development :: Libraries :: Application Frameworks",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "fastapi>=0.115.12",
+    "gradio>=3.36.1",
+    "llama-index>=0.12.36,<0.13",
+    "llama-index-core>=0.12.36,<0.13",
+    "llama-index-llms-google-genai>=0.1.13,<0.2",
+    "openai>=1.79.0",
+    "orjson>=3.10.18",
+    "pre-commit>=4.2.0",
+    "uvicorn>=0.34.2",
+]
+description = "Interface between LLMs and your data"
+keywords = [
+    "LLM",
+    "NLP",
+    "RAG",
+    "data",
+    "devtools",
+    "index",
+    "retrieval",
+]
+license = "MIT"
+name = "gemini-multimodal-agentworkflow"
+readme = "README.md"
+requires-python = ">=3.9,<4.0"
+version = "0.1.0"
+
+[project.urls]
+Repository = "https://github.com/AstraBert/gemini-multimodal-agentworkflow"
+
+[tool.hatch.build.targets.sdist]
+include = ["_llama-index/llama_index"]
+
+[tool.hatch.build.targets.wheel]
+include = ["_llama-index/llama_index"]
+
+[tool.hatch.build.targets.wheel.sources]
+"_llama-index/llama_index" = "llama_index"
+
+[tool.mypy]
+disallow_untyped_defs = true
+# Remove venv skip when integrated with pre-commit
+exclude = ["_static", "build", "examples", "llama_index/ingestion/client", "notebooks", "venv"]
+explicit_package_bases = true
+ignore_missing_imports = true
+mypy_path = "llama_index"
+namespace_packages = true
+plugins = "pydantic.mypy"
+python_version = "3.9"
+
+[tool.ruff]
+exclude = [
+    "_static",
+    "examples",
+    "llama_index/ingestion/client",
+    "notebooks",
+]
+target-version = "py312"
+lint.ignore = [
+    "COM812",  # Too aggressive
+    "D212",  # Using D213
+    "D417",  # Too aggressive
+    "F541",  # Messes with prompts.py
+    "RUF100",  # Allow blanket noqa
+    "TC002",
+    "UP",  # Remove when we drop Python 3.9
+    "PT001",
+    "E501",  # Use best judgement for line-length
+    "E402",  # Annoying, use best judgement
+    "PYI063",
+    "ANN204",  # this is annoying
+    "D401",  # I disagree
+    "D404",
+]
+# Feel free to add more here
+lint.select = [
+    "ANN204",
+    "B009",
+    "B010",
+    "B011",
+    "B013",
+    "B014",
+    "C4",
+    "COM812",
+    "COM819",
+    "D201",
+    "D202",
+    "D204",
+    "D207",
+    "D208",
+    "D209",
+    "D211",
+    "D213",
+    "D214",
+    "D215",
+    "D3",
+    "D4",
+    "E",
+    "EXE004",
+    "F401",
+    "F504",
+    "F541",
+    "F632",
+    "FLY",
+    "G010",
+    "I002",
+    "PERF1",
+    "PIE790",
+    "PIE794",
+    "PIE808",
+    "PIE810",
+    "PLC0414",
+    "PLE2510",
+    "PLE2512",
+    "PLE2513",
+    "PLE2514",
+    "PLE2515",
+    "PLR1711",
+    "PT001",
+    "PT003",
+    "PT006",
+    "PT02",
+    "PTH201",
+    "PYI",
+    "Q",
+    "RET501",
+    "RET502",
+    "RET503",
+    "RET504",
+    "RSE",
+    "RUF005",
+    "RUF010",
+    "RUF015",
+    "RUF1",
+    "SIM101",
+    "SIM103",
+    "SIM109",
+    "SIM118",
+    "SIM2",
+    "SIM300",
+    "SIM9",
+    "TC005",
+    "TD006",
+    "TID",
+    "TRY201",
+    "W",
+]
+lint.unfixable = [
+    "ERA001",
+]
+
+[tool.tomlsort]
+all = false
+in_place = true
+spaces_before_inline_comment = 2  # Match Python PEP 8
+spaces_indent_inline_array = 4  # Match Python PEP 8
+trailing_comma_inline_array = true
+
+[[tool.uv.index]]
+name = "nvidia-pypi"
+url = "https://pypi.nvidia.com"
@@ -0,0 +1,22 @@
+import gradio as gr
+import requests as rq
+
+def generate_image_for_user(prompt: str):
+    res = rq.post("http://0.0.0.0:8000/agent", json={"prompt": prompt})
+    if res.status_code > 400:
+        return "404.png", "An error has occurred while generating the image", f"Error: {res.text}"
+    else:
+        return "output.png", res.json()["process"], res.json()["response"]
+
+with gr.Blocks(theme=gr.themes.Citrus(primary_hue="indigo", secondary_hue="teal")) as frontend:
+    gr.HTML("<h1 align='center'>Image Generation Agent🎨</h1>")
+    gr.HTML("<h2 align='center'>Get stunning AI-generated images!</h2>")
+    with gr.Row():
+        usr_txt = gr.Textbox(label="Prompt", placeholder="Describe the image you want here...")
+        with gr.Column():
+            gen_img = gr.Image(label="Generated Image")
+            with gr.Accordion(label="Agent Output", open=False):
+                resp = gr.Markdown(label="Agent Response", container=True)
+                proc = gr.Markdown(label="Agent Process", container=True)
+    with gr.Row():
+        btn = gr.Button("Generate🖌️").click(fn=generate_image_for_user, inputs=[usr_txt], outputs=[gen_img, proc, resp])
@@ -0,0 +1,31 @@
+import json
+from app_frontend import gr, frontend
+from workflow import workflow
+from fastapi import FastAPI
+from fastapi.responses import ORJSONResponse
+from pydantic import BaseModel
+from llama_index.core.agent.workflow import ToolCall, ToolCallResult
+
+app = FastAPI(default_response_class=ORJSONResponse)
+
+class ApiInput(BaseModel):
+    prompt: str
+
+class ApiOutput(BaseModel):
+    process: str
+    response: str
+
+@app.post("/agent")
+async def run_agent(inpt: ApiInput) -> ApiOutput:
+    handler = workflow.run(user_msg=inpt.prompt)
+    process = ""
+    async for event in handler.stream_events():
+        if isinstance(event, ToolCallResult):
+            process += f"Tool call result for **{event.tool_name}**:\n\n```json\n{event.tool_output.model_dump_json(indent=4)}\n```\n"
+        elif isinstance(event, ToolCall):
+            process += f"Calling tool **{event.tool_name}** with input args:\n\n```json\n{json.dumps(event.tool_kwargs, indent=4)}\n```\n"
+    response = await handler
+    response = str(response)
+    return ApiOutput(process=process, response=response)
+
+app = gr.mount_gradio_app(app, frontend, "")
@@ -0,0 +1,57 @@
+import base64
+import json
+from pathlib import Path
+from utils import get_api_keys
+from openai import AsyncOpenAI
+from typing import Literal
+from pydantic import BaseModel, Field
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.core.llms import ChatMessage, MessageRole, ImageBlock, TextBlock
+
+
+class ImageEvaluation(BaseModel):
+    faithfulness: int = Field(description="Faithfulness of the generated image to the generation prompt, from 0 to 100")
+    quality: Literal["low", "mediocre", "average", "upper-intermediate", "high", "very high"] = Field(description="Quality of the image, expressed as one of: 'low', 'mediocre', 'average', 'upper-intermediate', 'high', 'very high'")
+    prompt_agnostic_description: str = Field(description="Description of the image, agnostic of the image generation prompt")
+
+openai_api_key, google_api_key =get_api_keys()
+async_openai_client = AsyncOpenAI(api_key=openai_api_key)
+llm = GoogleGenAI(model="gemini-2.0-flash", api_key=google_api_key)
+llm_struct = llm.as_structured_llm(ImageEvaluation)
+
+async def generate_image(prompt: str = Field(description="The image generation prompt")) -> str:
+    """
+    This tool useful to generate images.
+
+    Args:
+        prompt (str): The image generation prompt
+
+    """
+    try:
+        img = await async_openai_client.images.generate(
+            model="gpt-image-1",
+            prompt=prompt,
+            n=1,
+            size="1024x1024"
+        )
+        image_bytes = base64.b64decode(img.data[0].b64_json)
+        with open("output.png", "wb") as f:
+            f.write(image_bytes)
+        print("Generated image", flush=True)
+        return "Image successfully generated"
+    except Exception as e:
+        return f"An error occurred during image generation: {e.__str__()}"
+
+async def evaluate_generated_image(prompt: str = Field(description="The original prompt used to generate the image")) -> str:
+    """
+    This tool is useful to evaluate a generated image.
+
+    Args:
+        prompt (str): The original prompt used to generate the image
+
+    """
+    messages = [ChatMessage(role=MessageRole.USER, blocks=[ImageBlock(path=Path("output.png")), TextBlock(text=f"Could you (1) evaluate the faithfulness of the attached image to this prompt: '{prompt}', (2) evaluate the quality of the image and (3) produce a description of the image that is agnostic of the prompt that was used to generate it?")])]
+    resp = await llm_struct.achat(messages=messages)
+    struct_output = json.loads(resp.message.blocks[0].text)
+    print("Generated evaluation", flush=True)
+    return f"The generated image can be described as:\n'''\n{struct_output['prompt_agnostic_description']}\n'''\nThe faithfulness of the generated image to the original prompt is: {struct_output['faithfulness']}%.\nThe quality of the image is {struct_output['quality']}."
@@ -0,0 +1,18 @@
+from os import environ as ENV
+from dotenv import load_dotenv
+from typing import Tuple
+
+def get_api_keys() -> Tuple[str, str]:
+    openai_api_key = ENV.get("OPENAI_API_KEY", None)
+    if openai_api_key is None:
+        load_dotenv()
+        openai_api_key = ENV.get("OPENAI_API_KEY", None)
+        if not openai_api_key:
+            raise ValueError("There is no OPENAI_API_KEY declared among the environmental variables")
+    google_api_key = ENV.get("GOOGLE_API_KEY", None)
+    if google_api_key is None:
+        load_dotenv()
+        google_api_key = ENV.get("GOOGLE_API_KEY", None)
+        if not google_api_key:
+            raise ValueError("There is no GOOGLE_API_KEY declared among the environmental variables")
+    return openai_api_key, google_api_key
@@ -0,0 +1,22 @@
+from tools import generate_image, evaluate_generated_image
+from llama_index.core.agent.workflow import AgentWorkflow, FunctionAgent
+
+image_generation_agent = FunctionAgent(
+    name = "ImageGenerationAgent",
+    description= "An Agent suitable for internal feedback-driven generation of  images",
+    tools = [generate_image, evaluate_generated_image],
+    system_prompt = "You are the ImageGenerationAgent. Your task is to generate images, evaluate them and, based on the feedback from the evaluation, re-generate them or return them to the user. Specifically, you need to follow these steps:" \
+    "1. Generate an image starting from the user's prompt with the 'generate_image' tool." \
+    "2. Evaluate the generated image using the 'evaluate_generated_image' tool" \
+    "If you deem the evaluation positive:" \
+    "3. Return the image to the user, telling them what you generated" \
+    "Else:" \
+    "3. Refine the prompt for image generation, and go back to step 1" \
+    "Do not stop unless you generated an image that suits the original prompt from the user.",
+)
+
+workflow = AgentWorkflow(
+    agents = [image_generation_agent],
+    root_agent= image_generation_agent.name,
+    timeout=600,
+)