theAIGuysCode · EYOELTEKLE · Aug 16, 2022 · Aug 16, 2022 · Aug 19, 2022
diff --git a/objec-tracker.py b/objec-tracker.py
@@ -0,0 +1,241 @@
+import os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+import string
+import random
+import time
+import tensorflow as tf
+
+physical_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+    tf.config.experimental.set_memory_growth(physical_devices[0], True)
+from absl import app, flags, logging
+from absl.flags import FLAGS
+import core.utils as utils
+from core.yolov4 import filter_boxes
+from tensorflow.python.saved_model import tag_constants
+from core.config import cfg
+from PIL import Image
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import argparse
+from tensorflow.compat.v1 import ConfigProto
+from tensorflow.compat.v1 import InteractiveSession
+from flask import Flask,render_template,url_for,redirect,Response
+# deep sort imports
+from deep_sort import preprocessing, nn_matching
+from deep_sort.detection import Detection
+from deep_sort.tracker import Tracker
+from tools import generate_detections as gdet
+
+
+app = Flask(__name__)
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/video')
+def video():
+    return Response(main(),
+                    mimetype='multipart/x-mixed-replace; boundary=frame')
+
+def main():
+
+    max_cosine_distance = 0.4
+    nn_budget = None
+    nms_max_overlap = 1.0
+
+    # initialize deep sort algorithm
+    model_filename = 'model_data/mars-small128.pb'
+    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
+    # calculate cosine distance metric from feature extractor
+    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
+    # initialize tracker of the sort
+    tracker = Tracker(metric)
+
+    # load configuration for object detector from local file
+    config = ConfigProto()
+    config.gpu_options.allow_growth = True
+    session = InteractiveSession(config=config)
+    #STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) parameters
+    input_size = 416
+    inp = 300
+
+    saved_model_loaded = tf.saved_model.load('./checkpoints/yolov4-tiny-416', tags=[tag_constants.SERVING])
+    infer = saved_model_loaded.signatures['serving_default']
+
+    # begin video capture of the detector
+    try:
+        vid = cv2.VideoCapture(int(0))
+    except:
+        vid = cv2.VideoCapture(video_path)
+
+    out = None
+
+    # generate random file name
+    letters = string.ascii_lowercase
+    sgf =  ''.join(random.choice(letters) for i in range(5))
+    video = "./outputs/"
+    sgf = sgf + ".avi"
+    video = video + sgf
+    if True:
+    #by default VideoCapture returns float instead of int just comment
+        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+        codec = cv2.VideoWriter_fourcc(*'XVID')
+        out = cv2.VideoWriter(video, codec, 30, (width, height))
+
+    frame_num = 0
+    # while video is running do this
+
+
+
+
+    while True:
+        return_value, frame = vid.read()
+        if return_value:
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            image = Image.fromarray(frame)
+        else:
+            print('Video has ended or failed, try a different video format!!!')
+            break
+        frame_num +=1
+        print('Frame #: ', frame_num)
+        frame_size = frame.shape[:2]
+        image_data = cv2.resize(frame, (input_size, input_size))
+        image_data = image_data / 255.
+        image_data = image_data[np.newaxis, ...].astype(np.float32)
+        start_time = time.time()
+
+
+
+        batch_data = tf.constant(image_data)
+        pred_bbox = infer(batch_data)
+        for key, value in pred_bbox.items():
+                boxes = value[:, :, 0:4]
+                pred_conf = value[:, :, 4:]
+
+        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
+            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
+            scores=tf.reshape(
+                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
+            max_output_size_per_class=50,
+            max_total_size=50,
+            iou_threshold=0.45,
+           score_threshold=0.50
+        )
+
+        # convert data to numpy arrays and slice out unused elements of the array
+        num_objects = valid_detections.numpy()[0]
+        bboxes = boxes.numpy()[0]
+        bboxes = bboxes[0:int(num_objects)]
+        scores = scores.numpy()[0]
+        scores = scores[0:int(num_objects)]
+        classes = classes.numpy()[0]
+        classes = classes[0:int(num_objects)]
+
+        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height for the detection
+        original_h, original_w, _ = frame.shape
+        bboxes = utils.format_boxes(bboxes, original_h, original_w)
+
+        # store all predictions in one parameter for simplicity when calling functions for better usage
+        pred_bbox = [bboxes, scores, classes, num_objects]
+
+        # read in all class names from config of custom dataset
+        class_names = utils.read_class_names(cfg.YOLO.CLASSES)
+
+
+
+        # read from the dataset file
+        allowed_classes = ['person','elephant','bear','zebra','giraffe']
+
+        # loop through objects and use class index to get class name, allow only classes in allowed_classes list so as to start the detection
+        names = []
+        deleted_indx = []
+        for i in range(num_objects):
+            class_indx = int(classes[i])
+            class_name = class_names[class_indx]
+            if class_name not in allowed_classes:
+                deleted_indx.append(i)
+            else:
+                names.append(class_name)
+        names = np.array(names)
+        count = len(names)
+        if True:
+            cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
+            print("Objects being tracked: {}".format(count))
+        # delete unwanted detection
+        bboxes = np.delete(bboxes, deleted_indx, axis=0)
+        scores = np.delete(scores, deleted_indx, axis=0)
+
+        # encode yolo detections and feed to tracker sort..
+        features = encoder(frame, bboxes)
+        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]
+
+        #initialize color map..
+        cmap = plt.get_cmap('tab20b')
+        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
+
+        # run non-maxima supression ..
+        boxs = np.array([d.tlwh for d in detections])
+        scores = np.array([d.confidence for d in detections])
+        classes = np.array([d.class_name for d in detections])
+        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
+        detections = [detections[i] for i in indices]
+
+        # Call the tracker ..
+        tracker.predict()
+        tracker.update(detections)
+
+        # update tracks accordingly
+        for track in tracker.tracks:
+            if not track.is_confirmed() or track.time_since_update > 1:
+                continue
+            bbox = track.to_tlbr()
+            class_name = track.get_class()
+
+        # draw bbox on screen in gui
+            color = colors[int(track.track_id) % len(colors)]
+            color = [i * 255 for i in color]
+            frame=cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
+            frame=cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
+            frame=cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
+
+        # if enable info flag then print details about each track check
+            if True:
+                print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
+
+        # calculate frames per second of running detections to be displayed
+        fps =  1.0 / (time.time() - start_time)
+        print("FPS: %.2f" % fps)
+        result = np.asarray(frame)
+        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+
+
+
+        if True:
+
+            ret,buffer=cv2.imencode('.jpg',result)
+            pic=buffer.tobytes()
+            yield (b'--frame\r\n'
+               b'Content-Type: image/jpeg\r\n\r\n' + pic + b'\r\n\r\n')
+        # if output flag is set, save video file..
+        if True:
+           out.write(result)
+
+
+
+    vid.release()
+    cv2.destroyAllWindows()
+
+if __name__ == '__main__':
+    try:
+
+        app.run(host='0.0.0.0', port=2204, threaded=True,debug=True)
+        main()
+    except SystemExit:
+        pass
+
+
+
+
diff --git a/requirements.txt b/requirements.txt
@@ -6,3 +6,4 @@ absl-py
 easydict
 matplotlib
 pillow
+Flask
diff --git a/templates/index.html b/templates/index.html
@@ -0,0 +1,40 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Object-Tracking</title>
+</head>
+<style>
+
+   h1 
+   {
+     text-align:center;
+	 color:red;
+
+
+	 border:5px solid orange;
+
+    }
+	.title
+	{
+	 padding-left:280px;
+	 padding-right:280px;
+
+	}
+</style>
+
+<body>
+	<div class = title>
+    <h1>This is the live Stream</h1>
+	</div>
+    <div class = video>
+
+		 <img src="{{ url_for('video') }}" width='40%'>
+    </div>
+
+</body>
+
+</html>