Python Scripts for Image Detection

These are alternatives to ZMES.
zmai.py

A copy of https://gist.github.com/mdetweiler/e987d56df9a63290b562e6e7b00489c6 in case it goes offline.
#!/home/zmadmin/zmai/venv/bin/python3
#  The path above is to the python virtual environment that can be created
#
#  zmai.py by Michael Detweiler
#
#  Script to prompt the Google Gemini Flash multi-modal image analysis AI model 
#  and extract specified information from images.
#
#  For ZoneMinder motion dected image analysis.
#  Can be used to gather any (prompted) specified details from images that cause an alert to fire.
#  For vehicle information and direction tracking in ZoneMinder monitor (ZM 1.37 and later):
#  Just add this script to your "Recording" option "Event Start Command" in your monitor config "Recording" section.
#  Specify the full path to the script... /home/zmadmin/zmai/zmai.py 
#  Create one "Active" zone and a post event buffer of a few seconds or more as needed.
#
#  Note: You can use the output of the analysis to create alert audio over a speaker system
#  with TalkKonnect or Sonos.
#
#  Create a directory in /home/<user>/ for the script
#  mkdir zmai
#  chown www-data zmai
#  chgrp www-data zmai
#  On Ubuntu... chmod 755 /home/<user>
#  cd zmai/
#  Create the script and make sure permissions and ownership are set
#  chmod 755 zmai.py
#  chown www-data zmai.py
#  chgrp www-data zmai.py
#  sudo apt install python3.10-venv
#  python3 -m venv venv
#  source venv/bin/activate
#  In the virtual env...
#  pip3 install google-genai
#  pip3 install pillow
#  pip3 install mysql-connector-python
#  deactivate ... when done to exit the venv
#
#  Test with a previously recorded event for that monitor:
#  sudo su -s/bin/bash -c"/home/zmadmin/zmai/zmai.py 90608 1" www-data

from datetime import datetime
import subprocess
import sys
import mysql.connector
import logging
from time import sleep
#Image analysis imports
from PIL import Image
import io
from google import genai
from google.genai import types
import json
import re

logging.basicConfig(
    filename='/tmp/zmai.log',
    format='%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')

#/var/cache/zoneminder/events/1/2025-08-15/90640/00001-capture.jpg
base_img_path = '/var/cache/zoneminder/events/'

# Initialize the Gemini client (ensure your API key is configured)
client = genai.Client(api_key="AIzaSyBtH7GaCKhHlfnF7C1dz2NjxKZz2acyws8")

def main():
    logging.info("----------START EVENT----------")
    eventId = sys.argv[1]
    monitorId = sys.argv[2]
    logging.info("eventId = {0}".format(eventId))
    logging.info("monitorId = {0}".format(monitorId))

    mydb = mysql.connector.connect(
        host="localhost",
        user="zmuser",
        password="zmpass",
        database="zm"
    )
    connection = mydb

    with connection.cursor() as cursor:
        query = "SELECT Name FROM Monitors WHERE Id = {0}".format(monitorId)
        logging.info(query)
        try:
            cursor.execute(query)
            monitor_name = cursor.fetchone()[0]
            logging.info("monitor_name = {0}".format(monitor_name))
        except Exception as e:
            logging.info("Error: {0}".format(e))

        #Get the highest score image so far and hope it is a good one
        query = "SELECT TimeStamp,FrameId FROM Frames WHERE EventId = {0} ORDER BY score DESC LIMIT 1".format(eventId)
        logging.info(query)
        try:
            cursor.execute(query)
            datestamp_obj,frame_id = cursor.fetchall()[0]
            #datestamp_obj = cursor.fetchone()[0]
            datestamp = datestamp_obj.strftime("%Y-%m-%d")
            logging.info("**** Datestamp = {0} ****".format(datestamp))
            logging.info("**** FrameId = {0} ****".format(frame_id))
        except Exception as e:
            logging.info("Error: {0}".format(e))

        total_length = 5 #Total length of the zero padded frame name number part
        padded_number = str(frame_id).zfill(total_length)
        #/var/cache/zoneminder/events/1/2025-08-15/90640/00001-capture.jpg
        ipath = base_img_path + monitorId + '/' + datestamp + '/' + eventId + '/' + padded_number + '-capture.jpg'

        #Submit image path to LLM for analysis
        logging.info(ipath)
        img_data = analize_img(ipath)
        #Use the image data to create audio or other types of alerts!
        logging.info(img_data)

    logging.info("*----------END EVENT----------*")

def analize_img(image_path):
    # Load the image
    with open(image_path, 'rb') as f:
        image_bytes = f.read()
    response = client.models.generate_content(
        model="gemini-2.5-flash",  # Or your chosen model
        contents=[
            ''''Describe the content of this image as only the fields described in the following text.
            Use json output where all results are provided in a list of results like {"results": [...]}. 
            If there is a human, output the number of humans and the body size as adult or child in json 
            format as {"subject":"human","count":"xxx","size":"xxx"}. 
            If there is an animal, output the number of animals and the species in json 
            format as {"subject":"animal","count":"xxx","species":"xxx"}. 
            If there is a vehicle, output the make, model, year, color, vehicle's most prominant orientation in the image 
            view as left side or right side,
            type as car, suv, truck (Amazon, Fedex, UPS or the lettering on it if it forms words) 
            and include the license plate numbers and letters if visible in json 
            format as {"subject":"vehicle","make":"xxx","model":"xxx","year":"xxx","color":"xxx","orientation":"xxx","type":"xxx","license":"xxx"}''',
            types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
        ],
        #config=types.GenerateContentConfig(
        #    thinking_config=types.ThinkingConfig(thinking_budget=0) # Disables thinking for speed
        #),
    )
    
    logging.info("Model's response")
    logging.info(response.text)
    json_match = re.search(r".*json\s*(\{.*\})\s*.*",response.text, re.DOTALL)
    if json_match:
        json_string = json_match.group(1)
    else:
        json_string = response.text # Assume it's a direct JSON string 
    try:
        jdata = json.loads(json_string)
        img_data_str = ""
        for result in jdata["results"]:
            if result["subject"] == "vehicle":
                direction = "Unknown"
                if "right" in result["orientation"].lower():
                    direction = "departing"
                elif "left" in result["orientation"].lower():
                    direction = "arriving"
                img_data_str += f'There is a {result["color"]} {result["year"]} {result["make"]} {result["model"]} {result["type"]} {direction}.  '
            elif result["subject"] == "human":
                img_data_str += f'There is {result["count"]} {result["subject"]} that looks {result["size"]} detected.  '
            elif jdata["subject"] == "animal":
                img_data_str += f'There is {result["count"]} {result["subject"]} that looks like a {result["species"]} detected.  '
        logging.info(img_data_str)
        return jdata

    except Exception as e:
        logging.info("Error: {0}".format(e))

if __name__ == "__main__":
    main()
Python Scripts for Image Detection

zmai.py

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

Tools