Gemma 3 e Python: Crea un Organizzatore di Immagini
Pubblicato da Michele Saba
ATTENZIONE:
La versione scritta di questa video è ancora in fase di completamento. Scriverò nella DevelopersNewsletter non appena la guida sarà completata. Iscriviti alla newsletter, è gratis!
Di seguito il codice usato, così che possiate testarlo da subito:
import base64
import os
import shutil
from typing import Optional
import requests
SUPPORTED_EXTENSIONS = (".jpg", ".jpeg")
GEMMA_API_ENDPOINT = "http://127.0.0.1:1234/v1/chat/completions"
SYSTEM_PROMPT = (
"You will be given an image to analyze carefully. "
"If it contains people or human faces reply PEOPLE, "
"otherwise reply OTHER. You must not include anything "
"in the response except for either PEOPLE or OTHER based "
"on the given image."
)
def encode_image_to_base64(image_path: str) -> Optional[str]:
print(f"Encoding image {image_path}...")
try:
with open(image_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read())
print("Encoding completed!")
return encoded_image.decode("utf-8")
except Exception as e:
print(f"Failed to encode image: {e}")
return None
def generate_encoded_image_url(base64_image: str) -> str:
return f"data:image/jpeg;base64,{base64_image}"
def call_gemma_api(image_url: str) -> Optional[str]:
headers = {"Content-Type": "application/json"}
messages = [
{"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]},
{
"role": "user",
"content": [{"type": "image_url", "image_url": {"url": image_url}}],
},
]
payload = {"messages": messages}
try:
print(f"Sending request to {GEMMA_API_ENDPOINT}")
response = requests.post(
GEMMA_API_ENDPOINT, headers=headers, json=payload, timeout=120
)
response.raise_for_status()
result = response.json()
if "message" in result["choices"][0]:
return result["choices"][0]["message"]["content"]
else:
print("Error: Could not find expected text key in API response.")
print("Response received:", result)
return None
except requests.exceptions.RequestException as e:
print(f"Error calling API: {e}")
return None
except Exception as e:
print(f"An unexpected error occurred during API call: {e}")
return None
def process_folder(folder_path: str) -> None:
if not os.path.isdir(folder_path):
print(f"Error: Folder not found at {folder_path}")
return
people_folder = os.path.join(folder_path, "PEOPLE")
other_folder = os.path.join(folder_path, "OTHER")
os.makedirs(people_folder, exist_ok=True)
os.makedirs(other_folder, exist_ok=True)
print(f"Processing images in folder: {folder_path}")
print(f"Using API endpoint: {GEMMA_API_ENDPOINT}")
print("---")
print()
files_in_folder = [
f
for f in os.listdir(folder_path)
if os.path.isfile(os.path.join(folder_path, f))
]
for filename in files_in_folder:
if not filename.lower().endswith(SUPPORTED_EXTENSIONS):
print(f"Unsupported filename found in folder: {filename}")
print()
continue
image_path = os.path.join(folder_path, filename)
base64_image = encode_image_to_base64(image_path=image_path)
if base64_image:
image_url = generate_encoded_image_url(base64_image=base64_image)
interpretation = call_gemma_api(image_url)
print(f"Gemma Interpretation: {interpretation}")
if not interpretation in ["PEOPLE", "OTHER"]:
print(f"Unknown interpretation from Gemma: {interpretation}")
continue
if interpretation == "PEOPLE":
target_folder = people_folder
print(f"Moving {filename} to the PEOPLE folder")
else:
target_folder = other_folder
print(f"Moving {filename} to the OTHER folder")
target_path = os.path.join(target_folder, filename)
shutil.move(image_path, target_path)
print(f"Moved to: {target_path}")
print("---")
print()
print("Finished processing folder.")
if __name__ == "__main__":
info = "Image Organizer Powered by Gemma 3."
print("#" * len(info))
print(info)
print("#" * len(info))
print()
input_command = "Insert the folder path: "
images_folder_path = input(input_command)
print("-" * len(input_command + images_folder_path))
print()
process_folder(folder_path=images_folder_path)