Gemma 3 e Python: Crea un Organizzatore di Immagini

Pubblicato da Michele Saba



ATTENZIONE:

La versione scritta di questa video è ancora in fase di completamento. Scriverò nella DevelopersNewsletter non appena la guida sarà completata. Iscriviti alla newsletter, è gratis!

Di seguito il codice usato, così che possiate testarlo da subito:

import base64
import os
import shutil
from typing import Optional

import requests

SUPPORTED_EXTENSIONS = (".jpg", ".jpeg")
GEMMA_API_ENDPOINT = "http://127.0.0.1:1234/v1/chat/completions"
SYSTEM_PROMPT = (
    "You will be given an image to analyze carefully. "
    "If it contains people or human faces reply PEOPLE, "
    "otherwise reply OTHER. You must not include anything "
    "in the response except for either PEOPLE or OTHER based "
    "on the given image."
)


def encode_image_to_base64(image_path: str) -> Optional[str]:
    print(f"Encoding image {image_path}...")
    try:
        with open(image_path, "rb") as image_file:
            encoded_image = base64.b64encode(image_file.read())
            print("Encoding completed!")
            return encoded_image.decode("utf-8")
    except Exception as e:
        print(f"Failed to encode image: {e}")
        return None


def generate_encoded_image_url(base64_image: str) -> str:
    return f"data:image/jpeg;base64,{base64_image}"


def call_gemma_api(image_url: str) -> Optional[str]:
    headers = {"Content-Type": "application/json"}
    messages = [
        {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]},
        {
            "role": "user",
            "content": [{"type": "image_url", "image_url": {"url": image_url}}],
        },
    ]
    payload = {"messages": messages}
    try:
        print(f"Sending request to {GEMMA_API_ENDPOINT}")
        response = requests.post(
            GEMMA_API_ENDPOINT, headers=headers, json=payload, timeout=120
        )
        response.raise_for_status()
        result = response.json()
        if "message" in result["choices"][0]:
            return result["choices"][0]["message"]["content"]
        else:
            print("Error: Could not find expected text key in API response.")
            print("Response received:", result)
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error calling API: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred during API call: {e}")
        return None


def process_folder(folder_path: str) -> None:
    if not os.path.isdir(folder_path):
        print(f"Error: Folder not found at {folder_path}")
        return

    people_folder = os.path.join(folder_path, "PEOPLE")
    other_folder = os.path.join(folder_path, "OTHER")
    os.makedirs(people_folder, exist_ok=True)
    os.makedirs(other_folder, exist_ok=True)

    print(f"Processing images in folder: {folder_path}")
    print(f"Using API endpoint: {GEMMA_API_ENDPOINT}")
    print("---")
    print()

    files_in_folder = [
        f
        for f in os.listdir(folder_path)
        if os.path.isfile(os.path.join(folder_path, f))
    ]

    for filename in files_in_folder:
        if not filename.lower().endswith(SUPPORTED_EXTENSIONS):
            print(f"Unsupported filename found in folder: {filename}")
            print()
            continue

        image_path = os.path.join(folder_path, filename)

        base64_image = encode_image_to_base64(image_path=image_path)
        if base64_image:
            image_url = generate_encoded_image_url(base64_image=base64_image)
            interpretation = call_gemma_api(image_url)
            print(f"Gemma Interpretation: {interpretation}")

            if not interpretation in ["PEOPLE", "OTHER"]:
                print(f"Unknown interpretation from Gemma: {interpretation}")
                continue

            if interpretation == "PEOPLE":
                target_folder = people_folder
                print(f"Moving {filename} to the PEOPLE folder")
            else:
                target_folder = other_folder
                print(f"Moving {filename} to the OTHER folder")

            target_path = os.path.join(target_folder, filename)
            shutil.move(image_path, target_path)
            print(f"Moved to: {target_path}")

        print("---")
        print()

    print("Finished processing folder.")


if __name__ == "__main__":
    info = "Image Organizer Powered by Gemma 3."
    print("#" * len(info))
    print(info)
    print("#" * len(info))
    print()

    input_command = "Insert the folder path: "
    images_folder_path = input(input_command)
    print("-" * len(input_command + images_folder_path))
    print()

    process_folder(folder_path=images_folder_path)

Vuoi imparare Python come un/a professionista? Dai uno sguardo ai nostri