first commit

This commit is contained in:
Astra Clelia Bertelli
2025-05-19 22:26:50 +02:00
commit 74868bf274
14 changed files with 4121 additions and 0 deletions
+24
View File
@@ -0,0 +1,24 @@
name: Linting
on:
pull_request:
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v6
- name: Set up Python
run: uv python install 3.12
- name: Install pre-commit
shell: bash
run: uv venv && source .venv/bin/activate && uv pip install pre-commit
- name: Run linter
shell: bash
run: uv run -- pre-commit run -a
+5
View File
@@ -0,0 +1,5 @@
.venv/
.env
scripts/.env
*/__pycache__/
scripts/output.png
+67
View File
@@ -0,0 +1,67 @@
---
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-byte-order-marker
- id: check-merge-conflict
- id: check-symlinks
- id: check-toml
- id: check-yaml
- id: detect-private-key
- id: end-of-file-fixer
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.11.8
hooks:
- id: ruff
args: [--exit-non-zero-on-fix, --fix]
exclude: ".*poetry.lock|.*_static"
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.0.1
hooks:
- id: mypy
additional_dependencies:
[
"types-requests",
"types-Deprecated",
"types-redis",
"types-setuptools",
"types-PyYAML",
"types-protobuf==4.24.0.4",
]
args:
[
--namespace-packages,
--explicit-package-bases,
--disallow-untyped-defs,
--ignore-missing-imports,
--python-version=3.9,
]
entry: bash -c "export MYPYPATH=ingest_anything"
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.10.1
hooks:
- id: black-jupyter
name: black-docs-py
alias: black
files: ^(docs/|examples/)
# Using PEP 8's line length in docs prevents excess left/right scrolling
args: [--line-length=79]
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.3
hooks:
- id: prettier
- repo: https://github.com/pappasam/toml-sort
rev: v0.23.1
hooks:
- id: toml-sort-fix
View File
+21
View File
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 Jerry Liu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+3
View File
@@ -0,0 +1,3 @@
# Image Generation Agent
README coming soon!
+178
View File
@@ -0,0 +1,178 @@
[build-system]
build-backend = "hatchling.build"
requires = ["hatchling"]
[lint.flake8-annotations]
mypy-init-return = true
[lint.pydocstyle]
convention = "google"
[project]
authors = [{email = "clelia@runllama.ai", name = "Clelia Astra Bertelli"}]
classifiers = [
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries :: Application Frameworks",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"fastapi>=0.115.12",
"gradio>=3.36.1",
"llama-index>=0.12.36,<0.13",
"llama-index-core>=0.12.36,<0.13",
"llama-index-llms-google-genai>=0.1.13,<0.2",
"openai>=1.79.0",
"orjson>=3.10.18",
"pre-commit>=4.2.0",
"uvicorn>=0.34.2",
]
description = "Interface between LLMs and your data"
keywords = [
"LLM",
"NLP",
"RAG",
"data",
"devtools",
"index",
"retrieval",
]
license = "MIT"
name = "gemini-multimodal-agentworkflow"
readme = "README.md"
requires-python = ">=3.9,<4.0"
version = "0.1.0"
[project.urls]
Repository = "https://github.com/AstraBert/gemini-multimodal-agentworkflow"
[tool.hatch.build.targets.sdist]
include = ["_llama-index/llama_index"]
[tool.hatch.build.targets.wheel]
include = ["_llama-index/llama_index"]
[tool.hatch.build.targets.wheel.sources]
"_llama-index/llama_index" = "llama_index"
[tool.mypy]
disallow_untyped_defs = true
# Remove venv skip when integrated with pre-commit
exclude = ["_static", "build", "examples", "llama_index/ingestion/client", "notebooks", "venv"]
explicit_package_bases = true
ignore_missing_imports = true
mypy_path = "llama_index"
namespace_packages = true
plugins = "pydantic.mypy"
python_version = "3.9"
[tool.ruff]
exclude = [
"_static",
"examples",
"llama_index/ingestion/client",
"notebooks",
]
target-version = "py312"
lint.ignore = [
"COM812", # Too aggressive
"D212", # Using D213
"D417", # Too aggressive
"F541", # Messes with prompts.py
"RUF100", # Allow blanket noqa
"TC002",
"UP", # Remove when we drop Python 3.9
"PT001",
"E501", # Use best judgement for line-length
"E402", # Annoying, use best judgement
"PYI063",
"ANN204", # this is annoying
"D401", # I disagree
"D404",
]
# Feel free to add more here
lint.select = [
"ANN204",
"B009",
"B010",
"B011",
"B013",
"B014",
"C4",
"COM812",
"COM819",
"D201",
"D202",
"D204",
"D207",
"D208",
"D209",
"D211",
"D213",
"D214",
"D215",
"D3",
"D4",
"E",
"EXE004",
"F401",
"F504",
"F541",
"F632",
"FLY",
"G010",
"I002",
"PERF1",
"PIE790",
"PIE794",
"PIE808",
"PIE810",
"PLC0414",
"PLE2510",
"PLE2512",
"PLE2513",
"PLE2514",
"PLE2515",
"PLR1711",
"PT001",
"PT003",
"PT006",
"PT02",
"PTH201",
"PYI",
"Q",
"RET501",
"RET502",
"RET503",
"RET504",
"RSE",
"RUF005",
"RUF010",
"RUF015",
"RUF1",
"SIM101",
"SIM103",
"SIM109",
"SIM118",
"SIM2",
"SIM300",
"SIM9",
"TC005",
"TD006",
"TID",
"TRY201",
"W",
]
lint.unfixable = [
"ERA001",
]
[tool.tomlsort]
all = false
in_place = true
spaces_before_inline_comment = 2 # Match Python PEP 8
spaces_indent_inline_array = 4 # Match Python PEP 8
trailing_comma_inline_array = true
[[tool.uv.index]]
name = "nvidia-pypi"
url = "https://pypi.nvidia.com"
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

+22
View File
@@ -0,0 +1,22 @@
import gradio as gr
import requests as rq
def generate_image_for_user(prompt: str):
res = rq.post("http://0.0.0.0:8000/agent", json={"prompt": prompt})
if res.status_code > 400:
return "404.png", "An error has occurred while generating the image", f"Error: {res.text}"
else:
return "output.png", res.json()["process"], res.json()["response"]
with gr.Blocks(theme=gr.themes.Citrus(primary_hue="indigo", secondary_hue="teal")) as frontend:
gr.HTML("<h1 align='center'>Image Generation Agent🎨</h1>")
gr.HTML("<h2 align='center'>Get stunning AI-generated images!</h2>")
with gr.Row():
usr_txt = gr.Textbox(label="Prompt", placeholder="Describe the image you want here...")
with gr.Column():
gen_img = gr.Image(label="Generated Image")
with gr.Accordion(label="Agent Output", open=False):
resp = gr.Markdown(label="Agent Response", container=True)
proc = gr.Markdown(label="Agent Process", container=True)
with gr.Row():
btn = gr.Button("Generate🖌️").click(fn=generate_image_for_user, inputs=[usr_txt], outputs=[gen_img, proc, resp])
+31
View File
@@ -0,0 +1,31 @@
import json
from app_frontend import gr, frontend
from workflow import workflow
from fastapi import FastAPI
from fastapi.responses import ORJSONResponse
from pydantic import BaseModel
from llama_index.core.agent.workflow import ToolCall, ToolCallResult
app = FastAPI(default_response_class=ORJSONResponse)
class ApiInput(BaseModel):
prompt: str
class ApiOutput(BaseModel):
process: str
response: str
@app.post("/agent")
async def run_agent(inpt: ApiInput) -> ApiOutput:
handler = workflow.run(user_msg=inpt.prompt)
process = ""
async for event in handler.stream_events():
if isinstance(event, ToolCallResult):
process += f"Tool call result for **{event.tool_name}**:\n\n```json\n{event.tool_output.model_dump_json(indent=4)}\n```\n"
elif isinstance(event, ToolCall):
process += f"Calling tool **{event.tool_name}** with input args:\n\n```json\n{json.dumps(event.tool_kwargs, indent=4)}\n```\n"
response = await handler
response = str(response)
return ApiOutput(process=process, response=response)
app = gr.mount_gradio_app(app, frontend, "")
+57
View File
@@ -0,0 +1,57 @@
import base64
import json
from pathlib import Path
from utils import get_api_keys
from openai import AsyncOpenAI
from typing import Literal
from pydantic import BaseModel, Field
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core.llms import ChatMessage, MessageRole, ImageBlock, TextBlock
class ImageEvaluation(BaseModel):
faithfulness: int = Field(description="Faithfulness of the generated image to the generation prompt, from 0 to 100")
quality: Literal["low", "mediocre", "average", "upper-intermediate", "high", "very high"] = Field(description="Quality of the image, expressed as one of: 'low', 'mediocre', 'average', 'upper-intermediate', 'high', 'very high'")
prompt_agnostic_description: str = Field(description="Description of the image, agnostic of the image generation prompt")
openai_api_key, google_api_key =get_api_keys()
async_openai_client = AsyncOpenAI(api_key=openai_api_key)
llm = GoogleGenAI(model="gemini-2.0-flash", api_key=google_api_key)
llm_struct = llm.as_structured_llm(ImageEvaluation)
async def generate_image(prompt: str = Field(description="The image generation prompt")) -> str:
"""
This tool useful to generate images.
Args:
prompt (str): The image generation prompt
"""
try:
img = await async_openai_client.images.generate(
model="gpt-image-1",
prompt=prompt,
n=1,
size="1024x1024"
)
image_bytes = base64.b64decode(img.data[0].b64_json)
with open("output.png", "wb") as f:
f.write(image_bytes)
print("Generated image", flush=True)
return "Image successfully generated"
except Exception as e:
return f"An error occurred during image generation: {e.__str__()}"
async def evaluate_generated_image(prompt: str = Field(description="The original prompt used to generate the image")) -> str:
"""
This tool is useful to evaluate a generated image.
Args:
prompt (str): The original prompt used to generate the image
"""
messages = [ChatMessage(role=MessageRole.USER, blocks=[ImageBlock(path=Path("output.png")), TextBlock(text=f"Could you (1) evaluate the faithfulness of the attached image to this prompt: '{prompt}', (2) evaluate the quality of the image and (3) produce a description of the image that is agnostic of the prompt that was used to generate it?")])]
resp = await llm_struct.achat(messages=messages)
struct_output = json.loads(resp.message.blocks[0].text)
print("Generated evaluation", flush=True)
return f"The generated image can be described as:\n'''\n{struct_output['prompt_agnostic_description']}\n'''\nThe faithfulness of the generated image to the original prompt is: {struct_output['faithfulness']}%.\nThe quality of the image is {struct_output['quality']}."
+18
View File
@@ -0,0 +1,18 @@
from os import environ as ENV
from dotenv import load_dotenv
from typing import Tuple
def get_api_keys() -> Tuple[str, str]:
openai_api_key = ENV.get("OPENAI_API_KEY", None)
if openai_api_key is None:
load_dotenv()
openai_api_key = ENV.get("OPENAI_API_KEY", None)
if not openai_api_key:
raise ValueError("There is no OPENAI_API_KEY declared among the environmental variables")
google_api_key = ENV.get("GOOGLE_API_KEY", None)
if google_api_key is None:
load_dotenv()
google_api_key = ENV.get("GOOGLE_API_KEY", None)
if not google_api_key:
raise ValueError("There is no GOOGLE_API_KEY declared among the environmental variables")
return openai_api_key, google_api_key
+22
View File
@@ -0,0 +1,22 @@
from tools import generate_image, evaluate_generated_image
from llama_index.core.agent.workflow import AgentWorkflow, FunctionAgent
image_generation_agent = FunctionAgent(
name = "ImageGenerationAgent",
description= "An Agent suitable for internal feedback-driven generation of images",
tools = [generate_image, evaluate_generated_image],
system_prompt = "You are the ImageGenerationAgent. Your task is to generate images, evaluate them and, based on the feedback from the evaluation, re-generate them or return them to the user. Specifically, you need to follow these steps:" \
"1. Generate an image starting from the user's prompt with the 'generate_image' tool." \
"2. Evaluate the generated image using the 'evaluate_generated_image' tool" \
"If you deem the evaluation positive:" \
"3. Return the image to the user, telling them what you generated" \
"Else:" \
"3. Refine the prompt for image generation, and go back to step 1" \
"Do not stop unless you generated an image that suits the original prompt from the user.",
)
workflow = AgentWorkflow(
agents = [image_generation_agent],
root_agent= image_generation_agent.name,
timeout=600,
)
Generated
+3673
View File
File diff suppressed because it is too large Load Diff