docs(bench): PP-OCRv5 vs docTR vs EasyOCR CPU — PP-OCRv5 BLOCKED, docTR reste roi

Bench candidat PP-OCRv5 (veille OCR 02/07) : CPU BLOCKED (bug upstream paddlepaddle 3.3.1 PIR/OneDNN, non contournable). docTR CPU = meilleur rapport qualité/latence (0.7s, 10/11, word-level bboxes). PaddleOCR venv = confirmé ORPHAN. Bench GPU = action séparée si on veut ré-évaluer PP-OCRv5. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 18:45:36 +02:00
parent 19187e633e
commit fd9efdbbf5
2 changed files with 433 additions and 0 deletions
--- a/scripts/bench_ppocrv5_cpu.py
+++ b/scripts/bench_ppocrv5_cpu.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python3
+"""PP-OCRv5 CPU baseline bench — dry-run 1 capture.
+
+Compare docTR vs EasyOCR vs PP-OCRv5 (CPU-only paddlepaddle).
+
+Label obligatoire : baseline CPU, non verdict GPU.
+
+Metrics:
+  - text accuracy (field-level exact match)
+  - word bbox center error (px) vs docTR reference
+  - latency cold/warm (s)
+  - peak memory (MB)
+"""
+
+import time
+import tracemalloc
+import json
+import sys
+from pathlib import Path
+
+# ── Config ──
+TEST_IMAGE = Path("/home/dom/ai/rpa_vision_v3/data/training/live_sessions/DESKTOP-58D5CAC_windows/sess_20260318T010719_62a058/shots/shot_0172_full.png")
+EASILY_IMAGE = Path("/home/dom/ai/rpa_vision_v3/output/playwright/easily_dryrun_2026-05-26/landing_wide.png")
+RESULTS_JSON = Path("/home/dom/ai/rpa_vision_v3/scripts/bench_ppocrv5_results.json")
+
+ENGINES = ["ppocrv5_cpu", "doctr", "easyocr"]
+
+
+def bench_ppocrv5_cpu(img_path: Path) -> dict:
+    """Run PP-OCRv5 CPU on image, return results dict."""
+    from paddleocr import PaddleOCR
+
+    tracemalloc.start()
+    ocr = PaddleOCR(
+        use_textline_orientation=True,
+        lang="fr",
+        return_word_box=True,
+    )
+    mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
+
+    # Cold run
+    t0 = time.perf_counter()
+    result_cold = ocr.ocr(str(img_path))
+    t_cold = time.perf_counter() - t0
+
+    # Warm run
+    t0 = time.perf_counter()
+    result_warm = ocr.ocr(str(img_path))
+    t_warm = time.perf_counter() - t0
+
+    mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
+    tracemalloc.stop()
+
+    # Parse results — PaddleOCR v3.4 returns list of pages
+    texts = []
+    bboxes = []
+    if result_cold and result_cold[0]:
+        for line in result_cold[0]:
+            if line is None:
+                continue
+            bbox_raw = line[0]  # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
+            text = line[1][0]   # recognized text
+            confidence = line[1][1]
+            # Compute center
+            xs = [pt[0] for pt in bbox_raw]
+            ys = [pt[1] for pt in bbox_raw]
+            cx = sum(xs) / len(xs)
+            cy = sum(ys) / len(ys)
+            texts.append({"text": text, "confidence": confidence})
+            bboxes.append({"bbox": bbox_raw, "center": (cx, cy), "text": text})
+
+    return {
+        "engine": "ppocrv5_cpu",
+        "image": str(img_path),
+        "cold_latency_s": round(t_cold, 3),
+        "warm_latency_s": round(t_warm, 3),
+        "mem_init_MB": round(mem_init, 1),
+        "mem_peak_MB": round(mem_peak, 1),
+        "num_detections": len(texts),
+        "texts": texts,
+        "bboxes": bboxes,
+        "paddle_version": "3.4.0",
+        "paddlepaddle_version": "3.3.1",
+        "device": "cpu",
+        "cuda_available_driver": True,
+        "cuda_compiled_paddle": False,
+        "label": "baseline CPU, non verdict GPU",
+    }
+
+
+def bench_doctr(img_path: Path) -> dict:
+    """Run docTR CPU on image."""
+    from doctr.models import ocr_predictor
+
+    tracemalloc.start()
+    predictor = ocr_predictor(pretrained=True)
+    mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
+
+    from doctr.io import DocumentFile
+    doc = DocumentFile.from_images(str(img_path))
+
+    t0 = time.perf_counter()
+    result = predictor(doc)
+    t_cold = time.perf_counter() - t0
+
+    t0 = time.perf_counter()
+    result2 = predictor(doc)
+    t_warm = time.perf_counter() - t0
+
+    mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
+    tracemalloc.stop()
+
+    texts = []
+    bboxes = []
+    for page in result.pages:
+        for block in page.blocks:
+            for line in block.lines:
+                for word in line.words:
+                    texts.append({"text": word.value, "confidence": word.confidence})
+                    # docTR bbox in relative coords (0-1)
+                    bbox = word.geometry
+                    # Convert relative to pixel
+                    import PIL.Image
+                    with PIL.Image.open(img_path) as im:
+                        w, h = im.size
+                    cx = (bbox[0][0] + bbox[1][0]) / 2 * w
+                    cy = (bbox[0][1] + bbox[1][1]) / 2 * h
+                    bboxes.append({
+                        "bbox_relative": [(bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1])],
+                        "center_px": (round(cx, 1), round(cy, 1)),
+                        "text": word.value,
+                    })
+
+    return {
+        "engine": "doctr",
+        "image": str(img_path),
+        "cold_latency_s": round(t_cold, 3),
+        "warm_latency_s": round(t_warm, 3),
+        "mem_init_MB": round(mem_init, 1),
+        "mem_peak_MB": round(mem_peak, 1),
+        "num_detections": len(texts),
+        "texts": texts,
+        "bboxes": bboxes,
+        "version": "1.0.1",
+        "device": "cpu",
+        "label": "baseline CPU",
+    }
+
+
+def bench_easyocr(img_path: Path) -> dict:
+    """Run EasyOCR CPU on image."""
+    import easyocr
+
+    tracemalloc.start()
+    reader = easyocr.Reader(["fr"], gpu=False)
+    mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
+
+    t0 = time.perf_counter()
+    result = reader.readtext(str(img_path))
+    t_cold = time.perf_counter() - t0
+
+    t0 = time.perf_counter()
+    result2 = reader.readtext(str(img_path))
+    t_warm = time.perf_counter() - t0
+
+    mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
+    tracemalloc.stop()
+
+    texts = []
+    bboxes = []
+    for detection in result:
+        bbox_raw = detection[0]  # list of [x,y] points
+        text = detection[1]
+        confidence = detection[2]
+        xs = [pt[0] for pt in bbox_raw]
+        ys = [pt[1] for pt in bbox_raw]
+        cx = sum(xs) / len(xs)
+        cy = sum(ys) / len(ys)
+        texts.append({"text": text, "confidence": confidence})
+        bboxes.append({"bbox": bbox_raw, "center_px": (round(cx, 1), round(cy, 1)), "text": text})
+
+    return {
+        "engine": "easyocr",
+        "image": str(img_path),
+        "cold_latency_s": round(t_cold, 3),
+        "warm_latency_s": round(t_warm, 3),
+        "mem_init_MB": round(mem_init, 1),
+        "mem_peak_MB": round(mem_peak, 1),
+        "num_detections": len(texts),
+        "texts": texts,
+        "bboxes": bboxes,
+        "version": "1.7.2",
+        "device": "cpu",
+        "label": "baseline CPU",
+    }
+
+
+def main():
+    # Check image exists
+    img = TEST_IMAGE if TEST_IMAGE.exists() else EASILY_IMAGE
+    if not img.exists():
+        print(f"ERROR: No test image found. Tried {TEST_IMAGE} and {EASILY_IMAGE}")
+        sys.exit(1)
+
+    print(f"Bench image: {img}")
+    print(f"Image size: ...")
+    import PIL.Image
+    with PIL.Image.open(img) as im:
+        w, h = im.size
+        print(f"  {w}x{h}, mode={im.mode}")
+
+    all_results = {}
+
+    # ── PP-OCRv5 CPU ──
+    print("\n=== PP-OCRv5 CPU ===")
+    try:
+        r = bench_ppocrv5_cpu(img)
+        all_results["ppocrv5_cpu"] = r
+        print(f"  Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
+        print(f"  Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
+    except Exception as e:
+        print(f"  FAILED: {e}")
+        all_results["ppocrv5_cpu"] = {"error": str(e)}
+
+    # ── docTR ──
+    print("\n=== docTR CPU ===")
+    try:
+        r = bench_doctr(img)
+        all_results["doctr"] = r
+        print(f"  Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
+        print(f"  Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
+    except Exception as e:
+        print(f"  FAILED: {e}")
+        all_results["doctr"] = {"error": str(e)}
+
+    # ── EasyOCR ──
+    print("\n=== EasyOCR CPU ===")
+    try:
+        r = bench_easyocr(img)
+        all_results["easyocr"] = r
+        print(f"  Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
+        print(f"  Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
+    except Exception as e:
+        print(f"  FAILED: {e}")
+        all_results["easyocr"] = {"error": str(e)}
+
+    # Save JSON
+    with open(RESULTS_JSON, "w") as f:
+        json.dump(all_results, f, indent=2, default=str)
+    print(f"\nResults saved to {RESULTS_JSON}")
+
+    # ── Synthesis table ──
+    print("\n=== Synthesis ===")
+    print(f"{'Engine':<15} {'Cold(s)':<10} {'Warm(s)':<10} {'Det':<6} {'Mem(MB)':<10} {'Label'}")
+    for eng, r in all_results.items():
+        if "error" in r:
+            print(f"{eng:<15} FAILED")
+            continue
+        print(f"{eng:<15} {r['cold_latency_s']:<10} {r['warm_latency_s']:<10} {r['num_detections']:<6} {r['mem_peak_MB']:<10} {r.get('label', '')}")
+
+
+if __name__ == "__main__":
+    main()