mirror of
https://github.com/run-llama/image-generation-agent.git
synced 2026-06-30 21:17:55 -04:00
first commit
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
name: Linting
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install 3.12
|
||||
|
||||
- name: Install pre-commit
|
||||
shell: bash
|
||||
run: uv venv && source .venv/bin/activate && uv pip install pre-commit
|
||||
|
||||
- name: Run linter
|
||||
shell: bash
|
||||
run: uv run -- pre-commit run -a
|
||||
@@ -0,0 +1,5 @@
|
||||
.venv/
|
||||
.env
|
||||
scripts/.env
|
||||
*/__pycache__/
|
||||
scripts/output.png
|
||||
@@ -0,0 +1,67 @@
|
||||
---
|
||||
default_language_version:
|
||||
python: python3
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.5.0
|
||||
hooks:
|
||||
- id: check-byte-order-marker
|
||||
- id: check-merge-conflict
|
||||
- id: check-symlinks
|
||||
- id: check-toml
|
||||
- id: check-yaml
|
||||
- id: detect-private-key
|
||||
- id: end-of-file-fixer
|
||||
- id: mixed-line-ending
|
||||
- id: trailing-whitespace
|
||||
|
||||
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
||||
rev: v0.11.8
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--exit-non-zero-on-fix, --fix]
|
||||
exclude: ".*poetry.lock|.*_static"
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.0.1
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies:
|
||||
[
|
||||
"types-requests",
|
||||
"types-Deprecated",
|
||||
"types-redis",
|
||||
"types-setuptools",
|
||||
"types-PyYAML",
|
||||
"types-protobuf==4.24.0.4",
|
||||
]
|
||||
args:
|
||||
[
|
||||
--namespace-packages,
|
||||
--explicit-package-bases,
|
||||
--disallow-untyped-defs,
|
||||
--ignore-missing-imports,
|
||||
--python-version=3.9,
|
||||
]
|
||||
entry: bash -c "export MYPYPATH=ingest_anything"
|
||||
|
||||
- repo: https://github.com/psf/black-pre-commit-mirror
|
||||
rev: 23.10.1
|
||||
hooks:
|
||||
- id: black-jupyter
|
||||
name: black-docs-py
|
||||
alias: black
|
||||
files: ^(docs/|examples/)
|
||||
# Using PEP 8's line length in docs prevents excess left/right scrolling
|
||||
args: [--line-length=79]
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||
rev: v3.0.3
|
||||
hooks:
|
||||
- id: prettier
|
||||
|
||||
- repo: https://github.com/pappasam/toml-sort
|
||||
rev: v0.23.1
|
||||
hooks:
|
||||
- id: toml-sort-fix
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Jerry Liu
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
+178
@@ -0,0 +1,178 @@
|
||||
[build-system]
|
||||
build-backend = "hatchling.build"
|
||||
requires = ["hatchling"]
|
||||
|
||||
[lint.flake8-annotations]
|
||||
mypy-init-return = true
|
||||
|
||||
[lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[project]
|
||||
authors = [{email = "clelia@runllama.ai", name = "Clelia Astra Bertelli"}]
|
||||
classifiers = [
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
]
|
||||
dependencies = [
|
||||
"fastapi>=0.115.12",
|
||||
"gradio>=3.36.1",
|
||||
"llama-index>=0.12.36,<0.13",
|
||||
"llama-index-core>=0.12.36,<0.13",
|
||||
"llama-index-llms-google-genai>=0.1.13,<0.2",
|
||||
"openai>=1.79.0",
|
||||
"orjson>=3.10.18",
|
||||
"pre-commit>=4.2.0",
|
||||
"uvicorn>=0.34.2",
|
||||
]
|
||||
description = "Interface between LLMs and your data"
|
||||
keywords = [
|
||||
"LLM",
|
||||
"NLP",
|
||||
"RAG",
|
||||
"data",
|
||||
"devtools",
|
||||
"index",
|
||||
"retrieval",
|
||||
]
|
||||
license = "MIT"
|
||||
name = "gemini-multimodal-agentworkflow"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9,<4.0"
|
||||
version = "0.1.0"
|
||||
|
||||
[project.urls]
|
||||
Repository = "https://github.com/AstraBert/gemini-multimodal-agentworkflow"
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
include = ["_llama-index/llama_index"]
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
include = ["_llama-index/llama_index"]
|
||||
|
||||
[tool.hatch.build.targets.wheel.sources]
|
||||
"_llama-index/llama_index" = "llama_index"
|
||||
|
||||
[tool.mypy]
|
||||
disallow_untyped_defs = true
|
||||
# Remove venv skip when integrated with pre-commit
|
||||
exclude = ["_static", "build", "examples", "llama_index/ingestion/client", "notebooks", "venv"]
|
||||
explicit_package_bases = true
|
||||
ignore_missing_imports = true
|
||||
mypy_path = "llama_index"
|
||||
namespace_packages = true
|
||||
plugins = "pydantic.mypy"
|
||||
python_version = "3.9"
|
||||
|
||||
[tool.ruff]
|
||||
exclude = [
|
||||
"_static",
|
||||
"examples",
|
||||
"llama_index/ingestion/client",
|
||||
"notebooks",
|
||||
]
|
||||
target-version = "py312"
|
||||
lint.ignore = [
|
||||
"COM812", # Too aggressive
|
||||
"D212", # Using D213
|
||||
"D417", # Too aggressive
|
||||
"F541", # Messes with prompts.py
|
||||
"RUF100", # Allow blanket noqa
|
||||
"TC002",
|
||||
"UP", # Remove when we drop Python 3.9
|
||||
"PT001",
|
||||
"E501", # Use best judgement for line-length
|
||||
"E402", # Annoying, use best judgement
|
||||
"PYI063",
|
||||
"ANN204", # this is annoying
|
||||
"D401", # I disagree
|
||||
"D404",
|
||||
]
|
||||
# Feel free to add more here
|
||||
lint.select = [
|
||||
"ANN204",
|
||||
"B009",
|
||||
"B010",
|
||||
"B011",
|
||||
"B013",
|
||||
"B014",
|
||||
"C4",
|
||||
"COM812",
|
||||
"COM819",
|
||||
"D201",
|
||||
"D202",
|
||||
"D204",
|
||||
"D207",
|
||||
"D208",
|
||||
"D209",
|
||||
"D211",
|
||||
"D213",
|
||||
"D214",
|
||||
"D215",
|
||||
"D3",
|
||||
"D4",
|
||||
"E",
|
||||
"EXE004",
|
||||
"F401",
|
||||
"F504",
|
||||
"F541",
|
||||
"F632",
|
||||
"FLY",
|
||||
"G010",
|
||||
"I002",
|
||||
"PERF1",
|
||||
"PIE790",
|
||||
"PIE794",
|
||||
"PIE808",
|
||||
"PIE810",
|
||||
"PLC0414",
|
||||
"PLE2510",
|
||||
"PLE2512",
|
||||
"PLE2513",
|
||||
"PLE2514",
|
||||
"PLE2515",
|
||||
"PLR1711",
|
||||
"PT001",
|
||||
"PT003",
|
||||
"PT006",
|
||||
"PT02",
|
||||
"PTH201",
|
||||
"PYI",
|
||||
"Q",
|
||||
"RET501",
|
||||
"RET502",
|
||||
"RET503",
|
||||
"RET504",
|
||||
"RSE",
|
||||
"RUF005",
|
||||
"RUF010",
|
||||
"RUF015",
|
||||
"RUF1",
|
||||
"SIM101",
|
||||
"SIM103",
|
||||
"SIM109",
|
||||
"SIM118",
|
||||
"SIM2",
|
||||
"SIM300",
|
||||
"SIM9",
|
||||
"TC005",
|
||||
"TD006",
|
||||
"TID",
|
||||
"TRY201",
|
||||
"W",
|
||||
]
|
||||
lint.unfixable = [
|
||||
"ERA001",
|
||||
]
|
||||
|
||||
[tool.tomlsort]
|
||||
all = false
|
||||
in_place = true
|
||||
spaces_before_inline_comment = 2 # Match Python PEP 8
|
||||
spaces_indent_inline_array = 4 # Match Python PEP 8
|
||||
trailing_comma_inline_array = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "nvidia-pypi"
|
||||
url = "https://pypi.nvidia.com"
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 36 KiB |
@@ -0,0 +1,22 @@
|
||||
import gradio as gr
|
||||
import requests as rq
|
||||
|
||||
def generate_image_for_user(prompt: str):
|
||||
res = rq.post("http://0.0.0.0:8000/agent", json={"prompt": prompt})
|
||||
if res.status_code > 400:
|
||||
return "404.png", "An error has occurred while generating the image", f"Error: {res.text}"
|
||||
else:
|
||||
return "output.png", res.json()["process"], res.json()["response"]
|
||||
|
||||
with gr.Blocks(theme=gr.themes.Citrus(primary_hue="indigo", secondary_hue="teal")) as frontend:
|
||||
gr.HTML("<h1 align='center'>Image Generation Agent🎨</h1>")
|
||||
gr.HTML("<h2 align='center'>Get stunning AI-generated images!</h2>")
|
||||
with gr.Row():
|
||||
usr_txt = gr.Textbox(label="Prompt", placeholder="Describe the image you want here...")
|
||||
with gr.Column():
|
||||
gen_img = gr.Image(label="Generated Image")
|
||||
with gr.Accordion(label="Agent Output", open=False):
|
||||
resp = gr.Markdown(label="Agent Response", container=True)
|
||||
proc = gr.Markdown(label="Agent Process", container=True)
|
||||
with gr.Row():
|
||||
btn = gr.Button("Generate🖌️").click(fn=generate_image_for_user, inputs=[usr_txt], outputs=[gen_img, proc, resp])
|
||||
@@ -0,0 +1,31 @@
|
||||
import json
|
||||
from app_frontend import gr, frontend
|
||||
from workflow import workflow
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import ORJSONResponse
|
||||
from pydantic import BaseModel
|
||||
from llama_index.core.agent.workflow import ToolCall, ToolCallResult
|
||||
|
||||
app = FastAPI(default_response_class=ORJSONResponse)
|
||||
|
||||
class ApiInput(BaseModel):
|
||||
prompt: str
|
||||
|
||||
class ApiOutput(BaseModel):
|
||||
process: str
|
||||
response: str
|
||||
|
||||
@app.post("/agent")
|
||||
async def run_agent(inpt: ApiInput) -> ApiOutput:
|
||||
handler = workflow.run(user_msg=inpt.prompt)
|
||||
process = ""
|
||||
async for event in handler.stream_events():
|
||||
if isinstance(event, ToolCallResult):
|
||||
process += f"Tool call result for **{event.tool_name}**:\n\n```json\n{event.tool_output.model_dump_json(indent=4)}\n```\n"
|
||||
elif isinstance(event, ToolCall):
|
||||
process += f"Calling tool **{event.tool_name}** with input args:\n\n```json\n{json.dumps(event.tool_kwargs, indent=4)}\n```\n"
|
||||
response = await handler
|
||||
response = str(response)
|
||||
return ApiOutput(process=process, response=response)
|
||||
|
||||
app = gr.mount_gradio_app(app, frontend, "")
|
||||
@@ -0,0 +1,57 @@
|
||||
import base64
|
||||
import json
|
||||
from pathlib import Path
|
||||
from utils import get_api_keys
|
||||
from openai import AsyncOpenAI
|
||||
from typing import Literal
|
||||
from pydantic import BaseModel, Field
|
||||
from llama_index.llms.google_genai import GoogleGenAI
|
||||
from llama_index.core.llms import ChatMessage, MessageRole, ImageBlock, TextBlock
|
||||
|
||||
|
||||
class ImageEvaluation(BaseModel):
|
||||
faithfulness: int = Field(description="Faithfulness of the generated image to the generation prompt, from 0 to 100")
|
||||
quality: Literal["low", "mediocre", "average", "upper-intermediate", "high", "very high"] = Field(description="Quality of the image, expressed as one of: 'low', 'mediocre', 'average', 'upper-intermediate', 'high', 'very high'")
|
||||
prompt_agnostic_description: str = Field(description="Description of the image, agnostic of the image generation prompt")
|
||||
|
||||
openai_api_key, google_api_key =get_api_keys()
|
||||
async_openai_client = AsyncOpenAI(api_key=openai_api_key)
|
||||
llm = GoogleGenAI(model="gemini-2.0-flash", api_key=google_api_key)
|
||||
llm_struct = llm.as_structured_llm(ImageEvaluation)
|
||||
|
||||
async def generate_image(prompt: str = Field(description="The image generation prompt")) -> str:
|
||||
"""
|
||||
This tool useful to generate images.
|
||||
|
||||
Args:
|
||||
prompt (str): The image generation prompt
|
||||
|
||||
"""
|
||||
try:
|
||||
img = await async_openai_client.images.generate(
|
||||
model="gpt-image-1",
|
||||
prompt=prompt,
|
||||
n=1,
|
||||
size="1024x1024"
|
||||
)
|
||||
image_bytes = base64.b64decode(img.data[0].b64_json)
|
||||
with open("output.png", "wb") as f:
|
||||
f.write(image_bytes)
|
||||
print("Generated image", flush=True)
|
||||
return "Image successfully generated"
|
||||
except Exception as e:
|
||||
return f"An error occurred during image generation: {e.__str__()}"
|
||||
|
||||
async def evaluate_generated_image(prompt: str = Field(description="The original prompt used to generate the image")) -> str:
|
||||
"""
|
||||
This tool is useful to evaluate a generated image.
|
||||
|
||||
Args:
|
||||
prompt (str): The original prompt used to generate the image
|
||||
|
||||
"""
|
||||
messages = [ChatMessage(role=MessageRole.USER, blocks=[ImageBlock(path=Path("output.png")), TextBlock(text=f"Could you (1) evaluate the faithfulness of the attached image to this prompt: '{prompt}', (2) evaluate the quality of the image and (3) produce a description of the image that is agnostic of the prompt that was used to generate it?")])]
|
||||
resp = await llm_struct.achat(messages=messages)
|
||||
struct_output = json.loads(resp.message.blocks[0].text)
|
||||
print("Generated evaluation", flush=True)
|
||||
return f"The generated image can be described as:\n'''\n{struct_output['prompt_agnostic_description']}\n'''\nThe faithfulness of the generated image to the original prompt is: {struct_output['faithfulness']}%.\nThe quality of the image is {struct_output['quality']}."
|
||||
@@ -0,0 +1,18 @@
|
||||
from os import environ as ENV
|
||||
from dotenv import load_dotenv
|
||||
from typing import Tuple
|
||||
|
||||
def get_api_keys() -> Tuple[str, str]:
|
||||
openai_api_key = ENV.get("OPENAI_API_KEY", None)
|
||||
if openai_api_key is None:
|
||||
load_dotenv()
|
||||
openai_api_key = ENV.get("OPENAI_API_KEY", None)
|
||||
if not openai_api_key:
|
||||
raise ValueError("There is no OPENAI_API_KEY declared among the environmental variables")
|
||||
google_api_key = ENV.get("GOOGLE_API_KEY", None)
|
||||
if google_api_key is None:
|
||||
load_dotenv()
|
||||
google_api_key = ENV.get("GOOGLE_API_KEY", None)
|
||||
if not google_api_key:
|
||||
raise ValueError("There is no GOOGLE_API_KEY declared among the environmental variables")
|
||||
return openai_api_key, google_api_key
|
||||
@@ -0,0 +1,22 @@
|
||||
from tools import generate_image, evaluate_generated_image
|
||||
from llama_index.core.agent.workflow import AgentWorkflow, FunctionAgent
|
||||
|
||||
image_generation_agent = FunctionAgent(
|
||||
name = "ImageGenerationAgent",
|
||||
description= "An Agent suitable for internal feedback-driven generation of images",
|
||||
tools = [generate_image, evaluate_generated_image],
|
||||
system_prompt = "You are the ImageGenerationAgent. Your task is to generate images, evaluate them and, based on the feedback from the evaluation, re-generate them or return them to the user. Specifically, you need to follow these steps:" \
|
||||
"1. Generate an image starting from the user's prompt with the 'generate_image' tool." \
|
||||
"2. Evaluate the generated image using the 'evaluate_generated_image' tool" \
|
||||
"If you deem the evaluation positive:" \
|
||||
"3. Return the image to the user, telling them what you generated" \
|
||||
"Else:" \
|
||||
"3. Refine the prompt for image generation, and go back to step 1" \
|
||||
"Do not stop unless you generated an image that suits the original prompt from the user.",
|
||||
)
|
||||
|
||||
workflow = AgentWorkflow(
|
||||
agents = [image_generation_agent],
|
||||
root_agent= image_generation_agent.name,
|
||||
timeout=600,
|
||||
)
|
||||
Reference in New Issue
Block a user