Yolov8 support (#18)

* Delete yolov5s-int8-192.tflite * Delete yolov5s-int8-192_edgetpu.tflite * Add files via upload * Add files via upload * Update detect.py Added support for full integer models * Delete detect_int8.py * Update detect.py * Update edgetpumodel.py * Delete yolov8n_full_integer_quant_edgetpu_160_ETPU.log * Delete yolov8n_full_integer_quant_edgetpu_128_ETPU.log * Delete yolov8n_full_integer_quant_edgetpu_128_ETPU.tflite * Delete yolov8n_full_integer_quant_edgetpu_160_ETPU.tflite * Delete yolov8s_full_integer_quant_edgetpu_128_ETPU.log * Delete yolov8s_full_integer_quant_edgetpu_128_ETPU.tflite * Delete yolov8s_full_integer_quant_edgetpu_160_ETPU.log * Delete yolov8s_full_integer_quant_edgetpu_160_ETPU.tflite * Add files via upload * Add files via upload * Delete yolov8s_integer_quant_edgetpu_160_CPU.tflite * Delete yolov8s_integer_quant_edgetpu_160_CPU.log * Delete yolov5s-int8_edgetpu_128_ETPU.log * Delete yolov5s-int8_edgetpu_128_ETPU.tflite * Delete yolov5n-int8_edgetpu_128_ETPU.log * Delete yolov5n-int8_edgetpu_128_ETPU.tflite * Delete yolov8n_full_integer_quant_edgetpu_160_ETPU.log * Delete yolov8n_full_integer_quant_edgetpu_160_ETPU.tflite * Delete yolov8s_full_integer_quant_edgetpu_160_ETPU.log * Delete yolov8s_full_integer_quant_edgetpu_160_ETPU.tflite * Delete yolov5s-int8_edgetpu_160_ETPU.tflite * Delete yolov5n-int8_edgetpu_160_ETPU.tflite * Delete yolov5s-int8_edgetpu_160_ETPU.log * Delete yolov5n-int8_edgetpu_160_ETPU.log * Add files via upload * Update edgetpumodel.py * Update detect.py * Update edgetpumodel.py * Update detect.py * Update edgetpumodel.py * Update nms.py * Added v8 parameter to edgetpumodel.py and nms.py * Added v8 parameter to edgetpumodel.py and nms.py * moved v8 nms to dedicated function for better readibility, adapted v8 nms logic to match code used in https://github.com/ultralytics/ultralytics.git/examples/YOLOv8-ONNXRuntime/main.py * moved v8 nms to dedicated function for better readibility, adapted v8 nms logic to match code used in https://github.com/ultralytics/ultralytics.git/examples/YOLOv8-ONNXRuntime/main.py * coorected calculation of xc parameter * coorected calculation of xc parameter * coorected calculation of xc parameter * corrected calculation of xc parameter * cleanup * Added code for webstreaming of resulting frame * Added code for webstreaming of resulting frame * Re-added original models * cleanup * Update webstreaming.py * removed webstreaming support for clean v8 support PR
jveitchmichaelis · Apr 16, 2024 · 283644d · 283644d
1 parent 422d209
commit 283644d
Show file tree

Hide file tree

Showing 15 changed files with 238 additions and 17 deletions.
diff --git a/detect.py b/detect.py
@@ -33,6 +33,7 @@
     parser.add_argument("--bench_coco", action='store_true', help="Process a stream")
     parser.add_argument("--coco_path", type=str, help="Path to COCO 2017 Val folder")
     parser.add_argument("--quiet","-q", action='store_true', help="Disable logging (except errors)")
+    parser.add_argument("--v8", action='store_true', help="yolov8 model?")
 
     args = parser.parse_args()
 
@@ -44,10 +45,13 @@
         logger.error("Please select either an input image or a stream")
         exit(1)
 
-    model = EdgeTPUModel(args.model, args.names, conf_thresh=args.conf_thresh, iou_thresh=args.iou_thresh)
+    model = EdgeTPUModel(args.model, args.names, conf_thresh=args.conf_thresh, iou_thresh=args.iou_thresh, v8=args.v8)
     input_size = model.get_image_size()
 
-    x = (255*np.random.random((3,*input_size))).astype(np.uint8)
+    if args.v8:
+        x = (255*np.random.random((3,*input_size))).astype(np.int8)
+    else:
+        x = (255*np.random.random((3,*input_size))).astype(np.uint8)
     model.forward(x)
 
     conf_thresh = 0.25

diff --git a/edgetpumodel.py b/edgetpumodel.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pycoral.utils.edgetpu as etpu
 from pycoral.adapters import common
-from nms import non_max_suppression
+from nms import non_max_suppression, non_max_suppresion_v8
 import cv2
 import json
 
@@ -18,7 +18,7 @@
 
 class EdgeTPUModel:
 
-    def __init__(self, model_file, names_file, conf_thresh=0.25, iou_thresh=0.45, filter_classes=None, agnostic_nms=False, max_det=1000):
+    def __init__(self, model_file, names_file, conf_thresh=0.25, iou_thresh=0.45, filter_classes=None, agnostic_nms=False, max_det=1000, v8=False):
         """
         Creates an object for running a Yolov5 model on an EdgeTPU
         
@@ -42,7 +42,8 @@ def __init__(self, model_file, names_file, conf_thresh=0.25, iou_thresh=0.45, fi
         self.iou_thresh = iou_thresh
         self.filter_classes = filter_classes
         self.agnostic_nms = agnostic_nms
-        self.max_det = 1000
+        self.max_det = max_det
+        self.v8 = v8
 
         logger.info("Confidence threshold: {}".format(conf_thresh))
         logger.info("IOU threshold: {}".format(iou_thresh))
@@ -116,9 +117,8 @@ def get_image_size(self):
             logger.debug("Expecting input shape: {}".format(self.input_size))
             return self.input_size
         else:
-            logger.warn("Interpreter is not yet loaded")
-
-
+            logger.warning("Interpreter is not yet loaded")
+
     def predict(self, image_path, save_img=True, save_txt=True):
         logger.info("Attempting to load {}".format(image_path))
 
@@ -131,9 +131,7 @@ def predict(self, image_path, save_img=True, save_txt=True):
         det = self.process_predictions(pred[0], full_image, pad, output_path, save_img=save_img, save_txt=save_txt)
 
         return det
-
-
-
+
     def forward(self, x:np.ndarray, with_nms=True) -> np.ndarray:
         """
         Predict function using the EdgeTPU
@@ -149,25 +147,36 @@ def forward(self, x:np.ndarray, with_nms=True) -> np.ndarray:
         tstart = time.time()
         # Transpose if C, H, W
         if x.shape[0] == 3:
-          x = x.transpose((1,2,0))
+          x = x.transpose((1, 2, 0))
 
         x = x.astype('float32')
-
+        
         # Scale input, conversion is: real = (int_8 - zero)*scale
         x = (x/self.input_scale) + self.input_zero
-        x = x[np.newaxis].astype(np.uint8)
+        if self.v8:
+            x = x[np.newaxis].astype(np.int8)
+        else:
+            x = x[np.newaxis].astype(np.uint8)
 
         self.interpreter.set_tensor(self.input_details[0]['index'], x)
         self.interpreter.invoke()
 
         # Scale output
         result = (common.output_tensor(self.interpreter, 0).astype('float32') - self.output_zero) * self.output_scale
+        if self.v8:
+            result = np.transpose(result, [0, 2, 1])  # tranpose for yolov8 models
+
         self.inference_time = time.time() - tstart
 
         if with_nms:
 
             tstart = time.time()
-            nms_result = non_max_suppression(result, self.conf_thresh, self.iou_thresh, self.filter_classes, self.agnostic_nms, max_det=self.max_det)
+            if self.v8:
+                nms_result = non_max_suppresion_v8(result, self.conf_thresh, self.iou_thresh, self.filter_classes,
+                                                   self.agnostic_nms, max_det=self.max_det)
+            else:
+                nms_result = non_max_suppression(result, self.conf_thresh, self.iou_thresh, self.filter_classes,
+                                                 self.agnostic_nms, max_det=self.max_det)
             self.nms_time = time.time() - tstart
 
             return nms_result
@@ -230,7 +239,7 @@ def process_predictions(self, det, output_image, pad, output_path="detection.jpg
         if len(det):
             # Rescale boxes from img_size to im0 size
             # x1, y1, x2, y2=
-            det[:, :4] = self.get_scaled_coords(det[:,:4], output_image, pad)
+            det[:, :4] = self.get_scaled_coords(det[:, :4], output_image, pad)
             output = {}
             base, ext = os.path.splitext(output_path)
 
@@ -267,4 +276,4 @@ def process_predictions(self, det, output_image, pad, output_path="detection.jpg
             if save_img:
               cv2.imwrite(output_path, output_image)
 
-        return det
+        return det
diff --git a/nms.py b/nms.py
@@ -48,6 +48,100 @@ def nms(dets, scores, thresh):
 
     return np.array(keep)
 
+def box_iou(box_i,boxes):
+    raise NotImplementedError('merged iou calculation not implemented')
+
+def non_max_suppresion_v8(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
+                        labels=(), max_det=300):
+
+    nc = prediction.shape[2] - 4  # number of classes
+    xc = np.amax(prediction[..., 4:], axis=2) > conf_thres
+
+    # Checks
+    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
+    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
+
+    # Settings
+    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
+    time_limit = 10.0  # seconds to quit after
+    redundant = True  # require redundant detections
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+    merge = False  # use merge-NMS
+
+    t = time.time()
+    output = [np.zeros((0, 6))] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            l = labels[xi]
+            v = np.zeros((len(l), nc + 4))
+            v[:, :4] = l[:, 1:4]  # box
+            v[:, 4] = 1.0  # conf
+            v[range(len(l)), l[:, 0].long() + 4] = 1.0  # cls
+            x = np.concatenate((x, v), 0)
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Compute conf already done in v8
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 4:] > conf_thres).nonzero(as_tuple=False).T
+            x = np.concatenate((box[i], x[i, j + 4, None], j[:, None].astype(float)), axis=1)
+        else:  # best class only
+            conf = np.amax(x[:, 4:], axis=1, keepdims=True)
+            j = np.argmax(x[:, 4:], axis=1).reshape(conf.shape)
+            x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.flatten() > conf_thres]
+
+        # Filter by class
+        if classes is not None:
+            raise NotImplementedError('objectivity parameter not implemented for yolov8 models!')
+
+        # Apply finite constraint
+        # if not torch.isfinite(x).all():
+        #     x = x[torch.isfinite(x).all(1)]
+
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        elif n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+
+        # Batched NMS
+        # c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        # boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        boxes, scores = x[:, :4], x[:, 4]  # boxes, scores
+
+        i = nms(boxes, scores, iou_thres)  # NMS
+
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
+            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+            weights = iou * scores[None]  # box weights
+            x[i, :4] = np.dot(weights, x[:, :4]).astype(float) / weights.sum(1, keepdim=True)  # merged boxes
+            if redundant:
+                i = i[iou.sum(1) > 1]  # require redundancy
+
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            print(f'WARNING: NMS time limit {time_limit}s exceeded')
+            break  # time limit exceeded
+
+    return output
+
 
 def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
                         labels=(), max_det=300):

diff --git a/yolov5n-int8_edgetpu_160.log b/yolov5n-int8_edgetpu_160.log
@@ -0,0 +1,17 @@
+Edge TPU Compiler version 16.0.384591198
+Input: yolov5n-int8.tflite
+Output: yolov5n-int8_edgetpu.tflite
+
+Operator                       Count      Status
+
+RESIZE_NEAREST_NEIGHBOR        2          Mapped to Edge TPU
+STRIDED_SLICE                  9          Mapped to Edge TPU
+CONV_2D                        60         Mapped to Edge TPU
+QUANTIZE                       7          Mapped to Edge TPU
+MUL                            75         Mapped to Edge TPU
+MAX_POOL_2D                    3          Mapped to Edge TPU
+ADD                            10         Mapped to Edge TPU
+RESHAPE                        6          Mapped to Edge TPU
+PAD                            7          Mapped to Edge TPU
+CONCATENATION                  17         Mapped to Edge TPU
+LOGISTIC                       66         Mapped to Edge TPU
diff --git a/yolov5n-int8_edgetpu_160.tflite b/yolov5n-int8_edgetpu_160.tflite
diff --git a/yolov5s-int8_edgetpu_160.log b/yolov5s-int8_edgetpu_160.log
@@ -0,0 +1,17 @@
+Edge TPU Compiler version 16.0.384591198
+Input: yolov5s-int8.tflite
+Output: yolov5s-int8_edgetpu.tflite
+
+Operator                       Count      Status
+
+PAD                            7          Mapped to Edge TPU
+CONCATENATION                  17         Mapped to Edge TPU
+MAX_POOL_2D                    3          Mapped to Edge TPU
+LOGISTIC                       66         Mapped to Edge TPU
+ADD                            10         Mapped to Edge TPU
+CONV_2D                        60         Mapped to Edge TPU
+MUL                            75         Mapped to Edge TPU
+QUANTIZE                       7          Mapped to Edge TPU
+RESIZE_NEAREST_NEIGHBOR        2          Mapped to Edge TPU
+STRIDED_SLICE                  9          Mapped to Edge TPU
+RESHAPE                        6          Mapped to Edge TPU
diff --git a/yolov5s-int8_edgetpu_160.tflite b/yolov5s-int8_edgetpu_160.tflite
diff --git a/yolov8n_full_integer_quant_edgetpu_160.log b/yolov8n_full_integer_quant_edgetpu_160.log
@@ -0,0 +1,20 @@
+Edge TPU Compiler version 16.0.384591198
+Input: yolov8n_saved_model/yolov8n_full_integer_quant.tflite
+Output: yolov8n_saved_model/yolov8n_full_integer_quant_edgetpu.tflite
+
+Operator                       Count      Status
+
+MAX_POOL_2D                    3          Mapped to Edge TPU
+CONCATENATION                  18         Mapped to Edge TPU
+RESHAPE                        6          Mapped to Edge TPU
+RESIZE_NEAREST_NEIGHBOR        2          Mapped to Edge TPU
+CONV_2D                        64         Mapped to Edge TPU
+MUL                            59         Mapped to Edge TPU
+ADD                            8          Mapped to Edge TPU
+LOGISTIC                       58         Mapped to Edge TPU
+SOFTMAX                        1          Mapped to Edge TPU
+PAD                            7          Mapped to Edge TPU
+TRANSPOSE                      6          Mapped to Edge TPU
+STRIDED_SLICE                  20         Mapped to Edge TPU
+QUANTIZE                       3          Mapped to Edge TPU
+SUB                            2          Mapped to Edge TPU
diff --git a/yolov8n_full_integer_quant_edgetpu_160.tflite b/yolov8n_full_integer_quant_edgetpu_160.tflite
diff --git a/yolov8n_full_integer_quant_edgetpu_192.log b/yolov8n_full_integer_quant_edgetpu_192.log
@@ -0,0 +1,20 @@
+Edge TPU Compiler version 16.0.384591198
+Input: yolov8n_saved_model/yolov8n_full_integer_quant.tflite
+Output: yolov8n_saved_model/yolov8n_full_integer_quant_edgetpu.tflite
+
+Operator                       Count      Status
+
+SOFTMAX                        1          Mapped to Edge TPU
+ADD                            8          Mapped to Edge TPU
+PAD                            7          Mapped to Edge TPU
+RESHAPE                        6          Mapped to Edge TPU
+RESIZE_NEAREST_NEIGHBOR        2          Mapped to Edge TPU
+MUL                            59         Mapped to Edge TPU
+CONV_2D                        64         Mapped to Edge TPU
+STRIDED_SLICE                  20         Mapped to Edge TPU
+MAX_POOL_2D                    3          Mapped to Edge TPU
+CONCATENATION                  18         Mapped to Edge TPU
+SUB                            2          Mapped to Edge TPU
+TRANSPOSE                      6          Mapped to Edge TPU
+LOGISTIC                       58         Mapped to Edge TPU
+QUANTIZE                       3          Mapped to Edge TPU
diff --git a/yolov8n_full_integer_quant_edgetpu_192.tflite b/yolov8n_full_integer_quant_edgetpu_192.tflite
diff --git a/yolov8s_full_integer_quant_edgetpu_160.log b/yolov8s_full_integer_quant_edgetpu_160.log
@@ -0,0 +1,20 @@
+Edge TPU Compiler version 16.0.384591198
+Input: yolov8s_saved_model/yolov8s_full_integer_quant.tflite
+Output: yolov8s_saved_model/yolov8s_full_integer_quant_edgetpu.tflite
+
+Operator                       Count      Status
+
+ADD                            8          Mapped to Edge TPU
+MAX_POOL_2D                    3          Mapped to Edge TPU
+STRIDED_SLICE                  20         Mapped to Edge TPU
+QUANTIZE                       3          Mapped to Edge TPU
+CONV_2D                        64         Mapped to Edge TPU
+LOGISTIC                       58         Mapped to Edge TPU
+RESIZE_NEAREST_NEIGHBOR        2          Mapped to Edge TPU
+CONCATENATION                  18         Mapped to Edge TPU
+MUL                            59         Mapped to Edge TPU
+SOFTMAX                        1          Mapped to Edge TPU
+RESHAPE                        6          Mapped to Edge TPU
+TRANSPOSE                      6          Mapped to Edge TPU
+PAD                            7          Mapped to Edge TPU
+SUB                            2          Mapped to Edge TPU
diff --git a/yolov8s_full_integer_quant_edgetpu_160.tflite b/yolov8s_full_integer_quant_edgetpu_160.tflite
diff --git a/yolov8s_full_integer_quant_edgetpu_192.log b/yolov8s_full_integer_quant_edgetpu_192.log
@@ -0,0 +1,20 @@
+Edge TPU Compiler version 16.0.384591198
+Input: yolov8s_saved_model/yolov8s_full_integer_quant.tflite
+Output: yolov8s_saved_model/yolov8s_full_integer_quant_edgetpu.tflite
+
+Operator                       Count      Status
+
+RESHAPE                        6          Mapped to Edge TPU
+TRANSPOSE                      6          Mapped to Edge TPU
+MUL                            59         Mapped to Edge TPU
+SOFTMAX                        1          Mapped to Edge TPU
+STRIDED_SLICE                  20         Mapped to Edge TPU
+LOGISTIC                       58         Mapped to Edge TPU
+SUB                            2          Mapped to Edge TPU
+PAD                            7          Mapped to Edge TPU
+ADD                            8          Mapped to Edge TPU
+MAX_POOL_2D                    3          Mapped to Edge TPU
+QUANTIZE                       2          Mapped to Edge TPU
+CONV_2D                        64         Mapped to Edge TPU
+RESIZE_NEAREST_NEIGHBOR        2          Mapped to Edge TPU
+CONCATENATION                  18         Mapped to Edge TPU
diff --git a/yolov8s_full_integer_quant_edgetpu_192.tflite b/yolov8s_full_integer_quant_edgetpu_192.tflite