diff --git a/benchmark/config/text_detection_ppdetect.yaml b/benchmark/config/text_detection_ppdetect.yaml new file mode 100644 index 00000000..3cb59503 --- /dev/null +++ b/benchmark/config/text_detection_ppdetect.yaml @@ -0,0 +1,18 @@ +Benchmark: + name: "Text Detection Benchmark" + type: "Detection" + data: + path: "benchmark/data/text" + files: ["1.jpg", "2.jpg", "3.jpg"] + sizes: # [[w1, h1], ...], Omit to run at original scale + - [640, 480] + metric: + warmup: 30 + repeat: 10 + reduction: "median" + backend: "default" + target: "cpu" + +Model: + name: "PPDetect" + modelPath: "models/text_detection_ppdetect/text_detection_ppdetect_2022_June.onnx" diff --git a/models/__init__.py b/models/__init__.py index 1d073fb5..3808c977 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -11,6 +11,7 @@ from .image_classification_mobilenet.mobilenet_v2 import MobileNetV2 from .palm_detection_mediapipe.mp_palmdet import MPPalmDet from .license_plate_detection_yunet.lpd_yunet import LPD_YuNet +from .text_detection_ppdetect.ppdetect import PPDetect class Registery: def __init__(self, name): @@ -37,3 +38,4 @@ def register(self, item): MODELS.register(MobileNetV2) MODELS.register(MPPalmDet) MODELS.register(LPD_YuNet) +MODELS.register(PPDetect) diff --git a/models/text_detection_ppdetect/LICENSE b/models/text_detection_ppdetect/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/models/text_detection_ppdetect/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/models/text_detection_ppdetect/README.md b/models/text_detection_ppdetect/README.md new file mode 100644 index 00000000..df437216 --- /dev/null +++ b/models/text_detection_ppdetect/README.md @@ -0,0 +1,31 @@ +# PP-OCRv2 detect + +Real-time Scene Text Detection with Differentiable Binarization + +This model is ported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR). + +## Demo + +Run the following command to try the demo: +```shell +# detect on camera input +python demo.py +# detect on an image +python demo.py --input /path/to/image +``` +### Example outputs + +![cola](./examples/cola.jpg) + +![book](./examples/book.jpg) + +## License + +All files in this directory are licensed under [Apache 2.0 License](./LICENSE). + +## Reference + +- https://arxiv.org/abs/1911.08947 +- https://github.com/PaddlePaddle/PaddleOCR +- https://github.com/BADBADBADBOY/DBnet-lite.pytorch +- https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html \ No newline at end of file diff --git a/models/text_detection_ppdetect/demo.py b/models/text_detection_ppdetect/demo.py new file mode 100644 index 00000000..73c1ba28 --- /dev/null +++ b/models/text_detection_ppdetect/demo.py @@ -0,0 +1,123 @@ +# This file is part of OpenCV Zoo project. +# It is subject to the license terms in the LICENSE file found in the same directory. +# +import argparse + +import numpy as np +import cv2 as cv + +from ppdetect import PPDetect + +def str2bool(v): + if v.lower() in ['on', 'yes', 'true', 'y', 't']: + return True + elif v.lower() in ['off', 'no', 'false', 'n', 'f']: + return False + else: + raise NotImplementedError + +backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] +targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] +help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" +help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" +try: + backends += [cv.dnn.DNN_BACKEND_TIMVX] + targets += [cv.dnn.DNN_TARGET_NPU] + help_msg_backends += "; {:d}: TIMVX" + help_msg_targets += "; {:d}: NPU" +except: + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + +parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).') +parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='text_detection_ppdetect_2022_June.onnx', help='Path to the model.') +parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) +parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) +parser.add_argument('--width', type=int, default=736, + help='Preprocess input image by resizing to a specific width. It should be multiple by 32.') +parser.add_argument('--height', type=int, default=736, + help='Preprocess input image by resizing to a specific height. It should be multiple by 32.') +parser.add_argument('--binary_threshold', type=float, default=0.3, help='Threshold of the binary map.') +parser.add_argument('--box_threshold', type=float, default=0.5, help='Threshold of the box.') +parser.add_argument('--is_poly', type=str2bool, default=False, help='Set true for polygons.') +parser.add_argument('--max_candidates', type=int, default=200, help='Max candidates of polygons.') +parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' The unclip ratio of the detected text region, which determines the output size.') +parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +args = parser.parse_args() + +def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None): + output = image.copy() + + if fps is not None: + cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color) + + pts = np.array(results[0]) + output = cv.polylines(output, pts, isClosed, box_color, thickness) + + return output + +if __name__ == '__main__': + # Instantiate DB + model = PPDetect(modelPath=args.model, + binaryThreshold=args.binary_threshold, + boxThresh=args.box_threshold, + isPoly=args.is_poly, + maxCandidates=args.max_candidates, + unclipRatio=args.unclip_ratio, + backendId=args.backend, + targetId=args.target + ) + + # If input is an image + if args.input is not None: + image = cv.imread(args.input) + image = cv.resize(image, [args.width, args.height]) + + # Inference + print(image.shape) + results = model.infer(image) + + # Print results` + print('{} texts detected.'.format(len(results[0]))) + for idx, (bbox, score) in enumerate(zip(results[0], results[1])): + print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score)) + + # Draw results on the input image + image = visualize(image, results) + + # Save results if save is true + if args.save: + print('Results saved to result.jpg\n') + cv.imwrite('results/result.jpg', image) + + # Visualize results in a new window + if args.vis: + cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) + cv.imshow(args.input, image) + cv.waitKey(0) + else: # Omit input to call default camera + deviceId = 0 + cap = cv.VideoCapture(deviceId) + + tm = cv.TickMeter() + while cv.waitKey(1) < 0: + hasFrame, frame = cap.read() + if not hasFrame: + print('No frames grabbed!') + break + + frame = cv.resize(frame, [args.width, args.height]) + # Inference + tm.start() + results = model.infer(frame) # results is a tuple + tm.stop() + + # Draw results on the input image + frame = visualize(frame, results, fps=tm.getFPS()) + + # Visualize results in a new Window + cv.imshow('{} Demo'.format(model.name), frame) + + tm.reset() + diff --git a/models/text_detection_ppdetect/examples/book.jpg b/models/text_detection_ppdetect/examples/book.jpg new file mode 100644 index 00000000..6f6a543c Binary files /dev/null and b/models/text_detection_ppdetect/examples/book.jpg differ diff --git a/models/text_detection_ppdetect/examples/cola.jpg b/models/text_detection_ppdetect/examples/cola.jpg new file mode 100644 index 00000000..fda4ebd1 Binary files /dev/null and b/models/text_detection_ppdetect/examples/cola.jpg differ diff --git a/models/text_detection_ppdetect/ppdetect.py b/models/text_detection_ppdetect/ppdetect.py new file mode 100644 index 00000000..cb4ed060 --- /dev/null +++ b/models/text_detection_ppdetect/ppdetect.py @@ -0,0 +1,246 @@ +# This file is part of OpenCV Zoo project. +# It is subject to the license terms in the LICENSE file found in the same directory. +# +import cv2 +import numpy as np +import cv2 as cv +from shapely.geometry import Polygon +import pyclipper +class PPDetect: + def __init__(self, modelPath, boxThresh=0.6, binaryThreshold=0.3, isPoly=True, maxCandidates=200,minSize=3 ,unclipRatio=2.0, backendId=0, targetId=0): + self.model_path = modelPath + self.backend_id = backendId + self.target_id = targetId + self.input_names = '' + self.output_names = '' + + self.model = cv.dnn.readNet(self.model_path) + self.model.setPreferableBackend(self.backend_id) + self.model.setPreferableTarget(self.target_id) + self.mean = [0.485, 0.456, 0.406] + self.std = [0.229, 0.224, 0.225] + self.binaryThreshold = binaryThreshold + self.boxThresh = boxThresh + self.maxCandidates =maxCandidates + self.isPoly = isPoly + self.unclipRatio = unclipRatio + self.minSize = minSize + + @property + def name(self): + return self.__class__.__name__ + + def setBackend(self, backend): + self.backend_id = backend + self.model.setPreferableBackend(self.backend_id ) + + def setTarget(self, target): + self.target_id = target + self.model.setPreferableTarget(self.target_id ) + + def _preprocess(self, image): + input_blob = (image / 255.0 - self.mean) / self.std + input_blob = input_blob.transpose(2, 0, 1) + input_blob = input_blob[np.newaxis, :, :, :] + input_blob = input_blob.astype(np.float32) + return input_blob + + def infer(self, image): + # Preprocess + input_blob = self._preprocess(image) + self.model.setInput(input_blob, self.input_names) + # Forward + output_blob = self.model.forward(self.output_names) + # Postprocess + results = self._postprocess(output_blob) + + return results + + def polygonsFromBitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + pred = pred + height, width = bitmap.shape + boxes = [] + scores = [] + + contours, _ = cv2.findContours( + (bitmap * 255).astype(np.uint8), + cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + + for contour in contours[:self.maxCandidates]: + epsilon = 0.001 * cv2.arcLength(contour, True) + approx = cv2.approxPolyDP(contour, epsilon, True) + points = approx.reshape((-1, 2)) + if points.shape[0] < 4: + continue + # _, sside = self.get_mini_boxes(contour) + # if sside < self.min_size: + # continue + score = self.boxScoreFast(pred, points.reshape(-1, 2)) + if self.boxThresh > score: + continue + + if points.shape[0] > 2: + box = self.unClip(points, self.unclipRatio) + if len(box) > 1: + continue + else: + continue + box = box.reshape(-1, 2) + _, sside = self.getMiniBoxes(box.reshape((-1, 1, 2))) + if sside < self.minSize + 2: + continue + + if not isinstance(dest_width, int): + dest_width = dest_width.item() + dest_height = dest_height.item() + + box[:, 0] = np.clip( + np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes.append(box.tolist()) + scores.append(score) + return boxes, scores + + def boxesFromBitmap(self, pred, _bitmap, destWidth, destHeight): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + if len(outs) == 3: + img, contours, _ = outs[0], outs[1], outs[2] + elif len(outs) == 2: + contours, _ = outs[0], outs[1] + + num_contours = min(len(contours), self.maxCandidates) + boxes = np.zeros((num_contours, 4, 2), dtype=np.int32) + scores = np.zeros((num_contours,), dtype=np.float32) + + for index in range(num_contours): + contour = contours[index] + points, sside = self.getMiniBoxes(contour) + if sside < self.minSize: + continue + points = np.array(points) + score = self.boxScoreFast(pred, points.reshape(-1, 2)) + if self.boxThresh > score: + continue + + box = self.unClip(points, self.unclipRatio).reshape(-1, 1, 2) + box, sside = self.getMiniBoxes(box) + if sside < self.minSize + 2: + continue + box = np.array(box) + if not isinstance(destWidth, int): + destWidth = destWidth.item() + destHeight = destHeight.item() + + box[:, 0] = np.clip( + np.round(box[:, 0] / width * destWidth), 0, destWidth) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * destHeight), 0, destHeight) + boxes[index, :, :] = box.astype(np.int16) + scores[index] = score + return boxes, scores + + def unClip(self, box, unclip_ratio=2): + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def getMiniBoxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index1, index2, index3, index = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index1 = 0 + index4 = 1 + else: + index1 = 1 + index4 = 0 + if points[3][1] > points[2][1]: + index2 = 2 + index3 = 3 + else: + index2 = 3 + index3 = 2 + + box = [ + points[index1], points[index2], points[index3], points[index4] + ] + return box, min(bounding_box[1]) + + def boxScoreFast(self, bitmap, _box): + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def _postprocess(self, pred): + pred = pred[:, 0, :, :] + segmentation = pred > self.binaryThreshold + + boxes_batch = [] + score_batch = [] + for batch_index in range(pred.shape[0]): + height, width = pred.shape[-2:] + if (self.isPoly): + tmp_boxes, tmp_scores = self.polygonsFromBitmap( + pred[batch_index], segmentation[batch_index], width, height) + + boxes = [] + score = [] + for k in range(len(tmp_boxes)): + if tmp_scores[k] > self.boxThresh: + boxes.append(tmp_boxes[k]) + score.append(tmp_scores[k]) + if len(boxes) > 0: + for i in range(len(boxes)): + boxes[i] = np.array(boxes[i]) + boxes[i][:, 0] = boxes[i][:, 0] * 1.0 + boxes[i][:, 1] = boxes[i][:, 1] * 1.0 + + boxes_batch.append(boxes) + score_batch.append(score) + else: + tmp_boxes, tmp_scores = self.boxesFromBitmap( + pred[batch_index], segmentation[batch_index], width, height) + + boxes = [] + score = [] + for k in range(len(tmp_boxes)): + if tmp_scores[k] > self.boxThresh: + boxes.append(tmp_boxes[k]) + score.append(tmp_scores[k]) + if len(boxes) > 0: + boxes = np.array(boxes) + boxes[:, :, 0] = boxes[:, :, 0] * 1.0 + boxes[:, :, 1] = boxes[:, :, 1] * 1.0 + boxes_batch.append(boxes) + score_batch.append(score) + + return tuple(boxes_batch[0]), np.array(score_batch[0]) + diff --git a/models/text_detection_ppdetect/text_detection_ppdetect_2022_June.onnx b/models/text_detection_ppdetect/text_detection_ppdetect_2022_June.onnx new file mode 100644 index 00000000..f0365966 --- /dev/null +++ b/models/text_detection_ppdetect/text_detection_ppdetect_2022_June.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c5898f6e9bad26c6bb38e20ff17a11611ecade9d70d6d4b2c0d576f7874993 +size 2338643 diff --git a/models/text_detection_ppdetect/text_detection_ppdetect_2022_June_fp16.onnx b/models/text_detection_ppdetect/text_detection_ppdetect_2022_June_fp16.onnx new file mode 100644 index 00000000..4ba5d8a8 --- /dev/null +++ b/models/text_detection_ppdetect/text_detection_ppdetect_2022_June_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fcce6642b787c6b99fc772b61b4c9e06422f6dce92b4ed8aba5bd8dc6c817e9 +size 1481704