Smart AI Türklingel

Mithilfe von Python wurde auf einem RaspberryPi mit PiCamera eine Software installiert, die Personen erkennen kann und dann eine Slack Nachricht auf das Smartphone schicken kann, mit dem Inhalt, dass jemand vor der Türe steht.

Beschreibung

Um das Projekt zu realisieren, wurden auf dem Pi Tensorflow sowie OpenCV und alle anderen Packages installiert.

Im Code selbst wurde für die Kamera ein kontinuierlicher Stream implementiert, der jeweils die Frames in eine Funktion mit dem Modell gibt, wo dann eine Prediction über die Objekte im Frame erstellt wird mit den entsprechenden Bounding Boxes. Diese Daten werden dann zur Visualisierung im Video Feed genutzt.

Für den Fall, dass in mind. 5 aufeinanderfolgenden Frames mit einer Confidence von mehr als 85% eine Person erkannt wird, sendet das Programm eine Slacknachricht an einen bestimmten Slack Channel mit dem Inhalt „Somebody is at the door!“. Für diese Integration würde Slacker genutzt.

Die Kamera wurde außerdem darauf programmiert, Nachrichten nur zwischen 6:30 Uhr und 19:00 Uhr zu schicken, da es davor kein Tageslicht gibt. In Zukunft wird ein Licht mit Bewegungsmelder im Hof installiert um eine Nutzung bei Nacht zu ermöglichen.

Object Detection Model

Für diese Projekt wurde das bereits vortrainierte „ssdlite_mobilenet_v2_coco_2018_05_09“ genutzt. Dieses kann von Tensorflow Model Zoo einfach gedownloadet werden und beinhalten schon die Klasse „person“ sowie 89 weitere Objekte.

Code

# Import packages
import os
import cv2
import numpy as np
from picamera.array import PiRGBArray
from picamera import PiCamera
import tensorflow as tf
import argparse
import sys
import random
from PIL import Image
from datetime import datetime

from slacker import Slacker
slack = Slacker('YOUR_AUTHENTICATION_TOKEN')

#Set up start and end timing
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
start_time = "06:30:00"
end_time = "19:00:00"

# Set up camera constants
IM_WIDTH = 1280
IM_HEIGHT = 720

# Select camera type (if user enters --usbcam when calling this script,
# a USB webcam will be used)
camera_type = 'picamera'
parser = argparse.ArgumentParser()
parser.add_argument('--usbcam', help='Use a USB webcam instead of picamera',
                    action='store_true')
args = parser.parse_args()
if args.usbcam:
    camera_type = 'usb'

#### Initialize TensorFlow model ####

# This is needed since the working directory is the object_detection folder.
sys.path.append('..')

# Import utilites
from object_detection. utils import label_map_util
from object_detection. utils import visualization_utils as vis_util

# Name of the directory containing the object detection module we're using
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09'

# Grab path to current working directory
CWD_PATH = os.getcwd()

# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')

# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'data','mscoco_label_map.pbtxt')

# Number of classes the object detector can identify
NUM_CLASSES = 90

## Load the label map.
# Label maps map indices to category names, so that when the convolution
# network predicts `5`, we know that this corresponds to `airplane`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)


# Define input and output tensors (i.e. data) for the object detection classifier

# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')

#### Initialize other parameters ####

# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
font = cv2.FONT_HERSHEY_SIMPLEX

# Define inside box coordinates (top left and bottom right)
TL_inside = (int(IM_WIDTH*0.1),int(IM_HEIGHT*0.35))
BR_inside = (int(IM_WIDTH*0.45),int(IM_HEIGHT-5))

# Define outside box coordinates (top left and bottom right)
TL_outside = (int(IM_WIDTH*0.46),int(IM_HEIGHT*0.25))
BR_outside = (int(IM_WIDTH*0.8),int(IM_HEIGHT*.85))

# Initialize control variables used for pet detector
detected_inside = False
detected_outside = False

counter = 0

pause = 0
pause_counter = 0

#### Person detection function ####

# This function detects if a person is in the view
def person_detector(frame):

    # Use globals for the control variables so they retain their value after function exits
    # global detected_inside, detected_outside
    # global inside_counter, outside_counter
    global detected
    global counter
    global pause, pause_counter

    frame_expanded = np.expand_dims(frame, axis=0)

    # Perform the actual detection by running the model with the image as input
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: frame_expanded})

    # Draw the results of the detection (aka 'visulaize the results')
    vis_util.visualize_boxes_and_labels_on_image_array(
        frame,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.40)
    
    # Check the class of the top detected object by looking at classes[0][0].
    # If the top detected object is a cat (17) or a dog (18) (or a teddy bear (88) for test purposes),
    # find its center coordinates by looking at the boxes[0][0] variable.q
    # boxes[0][0] variable holds coordinates of detected objects as (ymin, xmin, ymax, xmax)
    print(int(classes[0][0]))
    print(scores[0][0])
    if (((int(classes[0][0]) == 1)) and (pause == 0) and (scores[0][0] > 0.85)):
        x = int(((boxes[0][0][1]+boxes[0][0][3])/2)*IM_WIDTH)
        y = int(((boxes[0][0][0]+boxes[0][0][2])/2)*IM_HEIGHT)
        print("Person detected")
        # Draw a circle at center of object
        cv2.circle(frame,(x,y), 5, (75,13,180), -1)

        #Increment counter for no of seconds that person is detected
        counter += 1
            
        # If object is in inside box, increment inside counter variable
        # if ((x > TL_inside[0]) and (x < BR_inside[0]) and (y > TL_inside[1]) and (y < BR_inside[1])):
        #     inside_counter = inside_counter + 1

        # If object is in outside box, increment outside counter variable
        # if ((x > TL_outside[0]) and (x < BR_outside[0]) and (y > TL_outside[1]) and (y < BR_outside[1])):
        #     outside_counter = outside_counter + 1

    # If pet has been detected inside for more than 10 frames, set detected_inside flag
    # and send a text to the phone.
    if counter > 5:
        detected = True
        #Slack message
        if (current_time > start_time) and (current_time < end_time):
            slack.chat.post_message('#tuerklingel_unten', "Somebody is at the door!")
        #print('/home/pi/Surveillance/surv' + random_no + '.jpg')
        counter = 0
        #outside_counter = 0
        # Pause pet detection by setting "pause" flag
        pause = 1

    # If pet has been detected outside for more than 10 frames, set detected_outside flag
    # and send a text to the phone.
    # if outside_counter > 10:
    #     detected_outside = True
    #     message = client.messages.create(
    #         body = 'Your pet wants inside!',
    #         from_=twilio_number,
    #         to=my_number
    #         )
    #     inside_counter = 0
    #     outside_counter = 0
        # Pause pet detection by setting "pause" flag
        # pause = 1

    # If pause flag is set, draw message on screen.
    if pause == 1:
        if detected == True:
            cv2.putText(frame,'JSomebody is at the door!',(int(IM_WIDTH*.1),int(IM_HEIGHT*.5)),font,3,(0,0,0),7,cv2.LINE_AA)
            cv2.putText(frame,'Somebody is at the door!',(int(IM_WIDTH*.1),int(IM_HEIGHT*.5)),font,3,(95,176,23),5,cv2.LINE_AA)

        # Increment pause counter until it reaches 30 (for a framerate of 1.5 FPS, this is about 20 seconds),
        # then unpause the application (set pause flag to 0).
        pause_counter = pause_counter + 1
        if pause_counter > 10:
            pause = 0
            pause_counter = 0
            detected = False
            # detected_outside = False

    return frame

#### Initialize camera and perform object detection ####

# The camera has to be set up and used differently depending on if it's a
# Picamera or USB webcam.

### Picamera ###
if camera_type == 'picamera':
    # Initialize Picamera and grab reference to the raw capture
    camera = PiCamera()
    camera.resolution = (IM_WIDTH,IM_HEIGHT)
    camera.framerate = 10
    rawCapture = PiRGBArray(camera, size=(IM_WIDTH,IM_HEIGHT))
    rawCapture.truncate(0)

    # Continuously capture frames and perform object detection on them
    for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):

        t1 = cv2.getTickCount()
        
        # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
        # i.e. a single-column array, where each item in the column has the pixel RGB value
        frame = frame1.array
        frame = np.copy(frame)
        frame.setflags(write=1)

        # Pass frame into pet detection function
        frame = person_detector(frame)

        # Draw FPS
        cv2.putText(frame,"FPS: {0:.2f}".format(frame_rate_calc),(30,50),font,1,(255,255,0),2,cv2.LINE_AA)

        # All the results have been drawn on the frame, so it's time to display it.
        cv2.imshow('Object detector', frame)

        # FPS calculation
        t2 = cv2.getTickCount()
        time1 = (t2-t1)/freq
        frame_rate_calc = 1/time1

        # Press 'q' to quit
        if cv2.waitKey(1) == ord('q'):
            break

        rawCapture.truncate(0)

    camera.close()

### USB webcam ###
    
elif camera_type == 'usb':
    # Initialize USB webcam feed
    camera = cv2.VideoCapture(0)
    ret = camera.set(3,IM_WIDTH)
    ret = camera.set(4,IM_HEIGHT)

    # Continuously capture frames and perform object detection on them
    while(True):

        t1 = cv2.getTickCount()

        # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
        # i.e. a single-column array, where each item in the column has the pixel RGB value
        ret, frame = camera.read()

        # Pass frame into pet detection function
        frame = person_detector(frame)

        # Draw FPS
        cv2.putText(frame,"FPS: {0:.2f}".format(frame_rate_calc),(30,50),font,1,(255,255,0),2,cv2.LINE_AA)

        # All the results have been drawn on the frame, so it's time to display it.
        cv2.imshow('Object detector', frame)

        # FPS calculation
        t2 = cv2.getTickCount()
        time1 = (t2-t1)/freq
        frame_rate_calc = 1/time1

        # Press 'q' to quit
        if cv2.waitKey(1) == ord('q'):
            break

    camera.release()
        
cv2.destroyAllWindows()

Ergebnis

Die visuelle Darstellung der Programmes sowie die Benachrichtigung auf dem Smartphone sehen wie folgt aus: