#!/usr/bin/env python3 """PP-OCRv5 CPU baseline bench — dry-run 1 capture. Compare docTR vs EasyOCR vs PP-OCRv5 (CPU-only paddlepaddle). Label obligatoire : baseline CPU, non verdict GPU. Metrics: - text accuracy (field-level exact match) - word bbox center error (px) vs docTR reference - latency cold/warm (s) - peak memory (MB) """ import time import tracemalloc import json import sys from pathlib import Path # ── Config ── TEST_IMAGE = Path("/home/dom/ai/rpa_vision_v3/data/training/live_sessions/DESKTOP-58D5CAC_windows/sess_20260318T010719_62a058/shots/shot_0172_full.png") EASILY_IMAGE = Path("/home/dom/ai/rpa_vision_v3/output/playwright/easily_dryrun_2026-05-26/landing_wide.png") RESULTS_JSON = Path("/home/dom/ai/rpa_vision_v3/scripts/bench_ppocrv5_results.json") ENGINES = ["ppocrv5_cpu", "doctr", "easyocr"] def bench_ppocrv5_cpu(img_path: Path) -> dict: """Run PP-OCRv5 CPU on image, return results dict.""" from paddleocr import PaddleOCR tracemalloc.start() ocr = PaddleOCR( use_textline_orientation=True, lang="fr", return_word_box=True, ) mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024 # Cold run t0 = time.perf_counter() result_cold = ocr.ocr(str(img_path)) t_cold = time.perf_counter() - t0 # Warm run t0 = time.perf_counter() result_warm = ocr.ocr(str(img_path)) t_warm = time.perf_counter() - t0 mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024 tracemalloc.stop() # Parse results — PaddleOCR v3.4 returns list of pages texts = [] bboxes = [] if result_cold and result_cold[0]: for line in result_cold[0]: if line is None: continue bbox_raw = line[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] text = line[1][0] # recognized text confidence = line[1][1] # Compute center xs = [pt[0] for pt in bbox_raw] ys = [pt[1] for pt in bbox_raw] cx = sum(xs) / len(xs) cy = sum(ys) / len(ys) texts.append({"text": text, "confidence": confidence}) bboxes.append({"bbox": bbox_raw, "center": (cx, cy), "text": text}) return { "engine": "ppocrv5_cpu", "image": str(img_path), "cold_latency_s": round(t_cold, 3), "warm_latency_s": round(t_warm, 3), "mem_init_MB": round(mem_init, 1), "mem_peak_MB": round(mem_peak, 1), "num_detections": len(texts), "texts": texts, "bboxes": bboxes, "paddle_version": "3.4.0", "paddlepaddle_version": "3.3.1", "device": "cpu", "cuda_available_driver": True, "cuda_compiled_paddle": False, "label": "baseline CPU, non verdict GPU", } def bench_doctr(img_path: Path) -> dict: """Run docTR CPU on image.""" from doctr.models import ocr_predictor tracemalloc.start() predictor = ocr_predictor(pretrained=True) mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024 from doctr.io import DocumentFile doc = DocumentFile.from_images(str(img_path)) t0 = time.perf_counter() result = predictor(doc) t_cold = time.perf_counter() - t0 t0 = time.perf_counter() result2 = predictor(doc) t_warm = time.perf_counter() - t0 mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024 tracemalloc.stop() texts = [] bboxes = [] for page in result.pages: for block in page.blocks: for line in block.lines: for word in line.words: texts.append({"text": word.value, "confidence": word.confidence}) # docTR bbox in relative coords (0-1) bbox = word.geometry # Convert relative to pixel import PIL.Image with PIL.Image.open(img_path) as im: w, h = im.size cx = (bbox[0][0] + bbox[1][0]) / 2 * w cy = (bbox[0][1] + bbox[1][1]) / 2 * h bboxes.append({ "bbox_relative": [(bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1])], "center_px": (round(cx, 1), round(cy, 1)), "text": word.value, }) return { "engine": "doctr", "image": str(img_path), "cold_latency_s": round(t_cold, 3), "warm_latency_s": round(t_warm, 3), "mem_init_MB": round(mem_init, 1), "mem_peak_MB": round(mem_peak, 1), "num_detections": len(texts), "texts": texts, "bboxes": bboxes, "version": "1.0.1", "device": "cpu", "label": "baseline CPU", } def bench_easyocr(img_path: Path) -> dict: """Run EasyOCR CPU on image.""" import easyocr tracemalloc.start() reader = easyocr.Reader(["fr"], gpu=False) mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024 t0 = time.perf_counter() result = reader.readtext(str(img_path)) t_cold = time.perf_counter() - t0 t0 = time.perf_counter() result2 = reader.readtext(str(img_path)) t_warm = time.perf_counter() - t0 mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024 tracemalloc.stop() texts = [] bboxes = [] for detection in result: bbox_raw = detection[0] # list of [x,y] points text = detection[1] confidence = detection[2] xs = [pt[0] for pt in bbox_raw] ys = [pt[1] for pt in bbox_raw] cx = sum(xs) / len(xs) cy = sum(ys) / len(ys) texts.append({"text": text, "confidence": confidence}) bboxes.append({"bbox": bbox_raw, "center_px": (round(cx, 1), round(cy, 1)), "text": text}) return { "engine": "easyocr", "image": str(img_path), "cold_latency_s": round(t_cold, 3), "warm_latency_s": round(t_warm, 3), "mem_init_MB": round(mem_init, 1), "mem_peak_MB": round(mem_peak, 1), "num_detections": len(texts), "texts": texts, "bboxes": bboxes, "version": "1.7.2", "device": "cpu", "label": "baseline CPU", } def main(): # Check image exists img = TEST_IMAGE if TEST_IMAGE.exists() else EASILY_IMAGE if not img.exists(): print(f"ERROR: No test image found. Tried {TEST_IMAGE} and {EASILY_IMAGE}") sys.exit(1) print(f"Bench image: {img}") print(f"Image size: ...") import PIL.Image with PIL.Image.open(img) as im: w, h = im.size print(f" {w}x{h}, mode={im.mode}") all_results = {} # ── PP-OCRv5 CPU ── print("\n=== PP-OCRv5 CPU ===") try: r = bench_ppocrv5_cpu(img) all_results["ppocrv5_cpu"] = r print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}") print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB") except Exception as e: print(f" FAILED: {e}") all_results["ppocrv5_cpu"] = {"error": str(e)} # ── docTR ── print("\n=== docTR CPU ===") try: r = bench_doctr(img) all_results["doctr"] = r print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}") print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB") except Exception as e: print(f" FAILED: {e}") all_results["doctr"] = {"error": str(e)} # ── EasyOCR ── print("\n=== EasyOCR CPU ===") try: r = bench_easyocr(img) all_results["easyocr"] = r print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}") print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB") except Exception as e: print(f" FAILED: {e}") all_results["easyocr"] = {"error": str(e)} # Save JSON with open(RESULTS_JSON, "w") as f: json.dump(all_results, f, indent=2, default=str) print(f"\nResults saved to {RESULTS_JSON}") # ── Synthesis table ── print("\n=== Synthesis ===") print(f"{'Engine':<15} {'Cold(s)':<10} {'Warm(s)':<10} {'Det':<6} {'Mem(MB)':<10} {'Label'}") for eng, r in all_results.items(): if "error" in r: print(f"{eng:<15} FAILED") continue print(f"{eng:<15} {r['cold_latency_s']:<10} {r['warm_latency_s']:<10} {r['num_detections']:<6} {r['mem_peak_MB']:<10} {r.get('label', '')}") if __name__ == "__main__": main()