rpa_vision_v3/scripts/bench_ppocrv5_cpu.py

#!/usr/bin/env python3
"""PP-OCRv5 CPU baseline bench — dry-run 1 capture.

Compare docTR vs EasyOCR vs PP-OCRv5 (CPU-only paddlepaddle).

Label obligatoire : baseline CPU, non verdict GPU.

Metrics:
  - text accuracy (field-level exact match)
  - word bbox center error (px) vs docTR reference
  - latency cold/warm (s)
  - peak memory (MB)
"""

import time
import tracemalloc
import json
import sys
from pathlib import Path

# ── Config ──
TEST_IMAGE = Path("/home/dom/ai/rpa_vision_v3/data/training/live_sessions/DESKTOP-58D5CAC_windows/sess_20260318T010719_62a058/shots/shot_0172_full.png")
EASILY_IMAGE = Path("/home/dom/ai/rpa_vision_v3/output/playwright/easily_dryrun_2026-05-26/landing_wide.png")
RESULTS_JSON = Path("/home/dom/ai/rpa_vision_v3/scripts/bench_ppocrv5_results.json")

ENGINES = ["ppocrv5_cpu", "doctr", "easyocr"]


def bench_ppocrv5_cpu(img_path: Path) -> dict:
    """Run PP-OCRv5 CPU on image, return results dict."""
    from paddleocr import PaddleOCR

    tracemalloc.start()
    ocr = PaddleOCR(
        use_textline_orientation=True,
        lang="fr",
        return_word_box=True,
    )
    mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024

    # Cold run
    t0 = time.perf_counter()
    result_cold = ocr.ocr(str(img_path))
    t_cold = time.perf_counter() - t0

    # Warm run
    t0 = time.perf_counter()
    result_warm = ocr.ocr(str(img_path))
    t_warm = time.perf_counter() - t0

    mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
    tracemalloc.stop()

    # Parse results — PaddleOCR v3.4 returns list of pages
    texts = []
    bboxes = []
    if result_cold and result_cold[0]:
        for line in result_cold[0]:
            if line is None:
                continue
            bbox_raw = line[0]  # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
            text = line[1][0]   # recognized text
            confidence = line[1][1]
            # Compute center
            xs = [pt[0] for pt in bbox_raw]
            ys = [pt[1] for pt in bbox_raw]
            cx = sum(xs) / len(xs)
            cy = sum(ys) / len(ys)
            texts.append({"text": text, "confidence": confidence})
            bboxes.append({"bbox": bbox_raw, "center": (cx, cy), "text": text})

    return {
        "engine": "ppocrv5_cpu",
        "image": str(img_path),
        "cold_latency_s": round(t_cold, 3),
        "warm_latency_s": round(t_warm, 3),
        "mem_init_MB": round(mem_init, 1),
        "mem_peak_MB": round(mem_peak, 1),
        "num_detections": len(texts),
        "texts": texts,
        "bboxes": bboxes,
        "paddle_version": "3.4.0",
        "paddlepaddle_version": "3.3.1",
        "device": "cpu",
        "cuda_available_driver": True,
        "cuda_compiled_paddle": False,
        "label": "baseline CPU, non verdict GPU",
    }


def bench_doctr(img_path: Path) -> dict:
    """Run docTR CPU on image."""
    from doctr.models import ocr_predictor

    tracemalloc.start()
    predictor = ocr_predictor(pretrained=True)
    mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024

    from doctr.io import DocumentFile
    doc = DocumentFile.from_images(str(img_path))

    t0 = time.perf_counter()
    result = predictor(doc)
    t_cold = time.perf_counter() - t0

    t0 = time.perf_counter()
    result2 = predictor(doc)
    t_warm = time.perf_counter() - t0

    mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
    tracemalloc.stop()

    texts = []
    bboxes = []
    for page in result.pages:
        for block in page.blocks:
            for line in block.lines:
                for word in line.words:
                    texts.append({"text": word.value, "confidence": word.confidence})
                    # docTR bbox in relative coords (0-1)
                    bbox = word.geometry
                    # Convert relative to pixel
                    import PIL.Image
                    with PIL.Image.open(img_path) as im:
                        w, h = im.size
                    cx = (bbox[0][0] + bbox[1][0]) / 2 * w
                    cy = (bbox[0][1] + bbox[1][1]) / 2 * h
                    bboxes.append({
                        "bbox_relative": [(bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1])],
                        "center_px": (round(cx, 1), round(cy, 1)),
                        "text": word.value,
                    })

    return {
        "engine": "doctr",
        "image": str(img_path),
        "cold_latency_s": round(t_cold, 3),
        "warm_latency_s": round(t_warm, 3),
        "mem_init_MB": round(mem_init, 1),
        "mem_peak_MB": round(mem_peak, 1),
        "num_detections": len(texts),
        "texts": texts,
        "bboxes": bboxes,
        "version": "1.0.1",
        "device": "cpu",
        "label": "baseline CPU",
    }


def bench_easyocr(img_path: Path) -> dict:
    """Run EasyOCR CPU on image."""
    import easyocr

    tracemalloc.start()
    reader = easyocr.Reader(["fr"], gpu=False)
    mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024

    t0 = time.perf_counter()
    result = reader.readtext(str(img_path))
    t_cold = time.perf_counter() - t0

    t0 = time.perf_counter()
    result2 = reader.readtext(str(img_path))
    t_warm = time.perf_counter() - t0

    mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
    tracemalloc.stop()

    texts = []
    bboxes = []
    for detection in result:
        bbox_raw = detection[0]  # list of [x,y] points
        text = detection[1]
        confidence = detection[2]
        xs = [pt[0] for pt in bbox_raw]
        ys = [pt[1] for pt in bbox_raw]
        cx = sum(xs) / len(xs)
        cy = sum(ys) / len(ys)
        texts.append({"text": text, "confidence": confidence})
        bboxes.append({"bbox": bbox_raw, "center_px": (round(cx, 1), round(cy, 1)), "text": text})

    return {
        "engine": "easyocr",
        "image": str(img_path),
        "cold_latency_s": round(t_cold, 3),
        "warm_latency_s": round(t_warm, 3),
        "mem_init_MB": round(mem_init, 1),
        "mem_peak_MB": round(mem_peak, 1),
        "num_detections": len(texts),
        "texts": texts,
        "bboxes": bboxes,
        "version": "1.7.2",
        "device": "cpu",
        "label": "baseline CPU",
    }


def main():
    # Check image exists
    img = TEST_IMAGE if TEST_IMAGE.exists() else EASILY_IMAGE
    if not img.exists():
        print(f"ERROR: No test image found. Tried {TEST_IMAGE} and {EASILY_IMAGE}")
        sys.exit(1)

    print(f"Bench image: {img}")
    print(f"Image size: ...")
    import PIL.Image
    with PIL.Image.open(img) as im:
        w, h = im.size
        print(f"  {w}x{h}, mode={im.mode}")

    all_results = {}

    # ── PP-OCRv5 CPU ──
    print("\n=== PP-OCRv5 CPU ===")
    try:
        r = bench_ppocrv5_cpu(img)
        all_results["ppocrv5_cpu"] = r
        print(f"  Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
        print(f"  Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
    except Exception as e:
        print(f"  FAILED: {e}")
        all_results["ppocrv5_cpu"] = {"error": str(e)}

    # ── docTR ──
    print("\n=== docTR CPU ===")
    try:
        r = bench_doctr(img)
        all_results["doctr"] = r
        print(f"  Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
        print(f"  Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
    except Exception as e:
        print(f"  FAILED: {e}")
        all_results["doctr"] = {"error": str(e)}

    # ── EasyOCR ──
    print("\n=== EasyOCR CPU ===")
    try:
        r = bench_easyocr(img)
        all_results["easyocr"] = r
        print(f"  Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
        print(f"  Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
    except Exception as e:
        print(f"  FAILED: {e}")
        all_results["easyocr"] = {"error": str(e)}

    # Save JSON
    with open(RESULTS_JSON, "w") as f:
        json.dump(all_results, f, indent=2, default=str)
    print(f"\nResults saved to {RESULTS_JSON}")

    # ── Synthesis table ──
    print("\n=== Synthesis ===")
    print(f"{'Engine':<15} {'Cold(s)':<10} {'Warm(s)':<10} {'Det':<6} {'Mem(MB)':<10} {'Label'}")
    for eng, r in all_results.items():
        if "error" in r:
            print(f"{eng:<15} FAILED")
            continue
        print(f"{eng:<15} {r['cold_latency_s']:<10} {r['warm_latency_s']:<10} {r['num_detections']:<6} {r['mem_peak_MB']:<10} {r.get('label', '')}")


if __name__ == "__main__":
    main()