docs(bench): PP-OCRv5 vs docTR vs EasyOCR CPU — PP-OCRv5 BLOCKED, docTR reste roi
Bench candidat PP-OCRv5 (veille OCR 02/07) : CPU BLOCKED (bug upstream paddlepaddle 3.3.1 PIR/OneDNN, non contournable). docTR CPU = meilleur rapport qualité/latence (0.7s, 10/11, word-level bboxes). PaddleOCR venv = confirmé ORPHAN. Bench GPU = action séparée si on veut ré-évaluer PP-OCRv5. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
263
scripts/bench_ppocrv5_cpu.py
Normal file
263
scripts/bench_ppocrv5_cpu.py
Normal file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env python3
|
||||
"""PP-OCRv5 CPU baseline bench — dry-run 1 capture.
|
||||
|
||||
Compare docTR vs EasyOCR vs PP-OCRv5 (CPU-only paddlepaddle).
|
||||
|
||||
Label obligatoire : baseline CPU, non verdict GPU.
|
||||
|
||||
Metrics:
|
||||
- text accuracy (field-level exact match)
|
||||
- word bbox center error (px) vs docTR reference
|
||||
- latency cold/warm (s)
|
||||
- peak memory (MB)
|
||||
"""
|
||||
|
||||
import time
|
||||
import tracemalloc
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ── Config ──
|
||||
TEST_IMAGE = Path("/home/dom/ai/rpa_vision_v3/data/training/live_sessions/DESKTOP-58D5CAC_windows/sess_20260318T010719_62a058/shots/shot_0172_full.png")
|
||||
EASILY_IMAGE = Path("/home/dom/ai/rpa_vision_v3/output/playwright/easily_dryrun_2026-05-26/landing_wide.png")
|
||||
RESULTS_JSON = Path("/home/dom/ai/rpa_vision_v3/scripts/bench_ppocrv5_results.json")
|
||||
|
||||
ENGINES = ["ppocrv5_cpu", "doctr", "easyocr"]
|
||||
|
||||
|
||||
def bench_ppocrv5_cpu(img_path: Path) -> dict:
|
||||
"""Run PP-OCRv5 CPU on image, return results dict."""
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
tracemalloc.start()
|
||||
ocr = PaddleOCR(
|
||||
use_textline_orientation=True,
|
||||
lang="fr",
|
||||
return_word_box=True,
|
||||
)
|
||||
mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
|
||||
# Cold run
|
||||
t0 = time.perf_counter()
|
||||
result_cold = ocr.ocr(str(img_path))
|
||||
t_cold = time.perf_counter() - t0
|
||||
|
||||
# Warm run
|
||||
t0 = time.perf_counter()
|
||||
result_warm = ocr.ocr(str(img_path))
|
||||
t_warm = time.perf_counter() - t0
|
||||
|
||||
mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
tracemalloc.stop()
|
||||
|
||||
# Parse results — PaddleOCR v3.4 returns list of pages
|
||||
texts = []
|
||||
bboxes = []
|
||||
if result_cold and result_cold[0]:
|
||||
for line in result_cold[0]:
|
||||
if line is None:
|
||||
continue
|
||||
bbox_raw = line[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
|
||||
text = line[1][0] # recognized text
|
||||
confidence = line[1][1]
|
||||
# Compute center
|
||||
xs = [pt[0] for pt in bbox_raw]
|
||||
ys = [pt[1] for pt in bbox_raw]
|
||||
cx = sum(xs) / len(xs)
|
||||
cy = sum(ys) / len(ys)
|
||||
texts.append({"text": text, "confidence": confidence})
|
||||
bboxes.append({"bbox": bbox_raw, "center": (cx, cy), "text": text})
|
||||
|
||||
return {
|
||||
"engine": "ppocrv5_cpu",
|
||||
"image": str(img_path),
|
||||
"cold_latency_s": round(t_cold, 3),
|
||||
"warm_latency_s": round(t_warm, 3),
|
||||
"mem_init_MB": round(mem_init, 1),
|
||||
"mem_peak_MB": round(mem_peak, 1),
|
||||
"num_detections": len(texts),
|
||||
"texts": texts,
|
||||
"bboxes": bboxes,
|
||||
"paddle_version": "3.4.0",
|
||||
"paddlepaddle_version": "3.3.1",
|
||||
"device": "cpu",
|
||||
"cuda_available_driver": True,
|
||||
"cuda_compiled_paddle": False,
|
||||
"label": "baseline CPU, non verdict GPU",
|
||||
}
|
||||
|
||||
|
||||
def bench_doctr(img_path: Path) -> dict:
|
||||
"""Run docTR CPU on image."""
|
||||
from doctr.models import ocr_predictor
|
||||
|
||||
tracemalloc.start()
|
||||
predictor = ocr_predictor(pretrained=True)
|
||||
mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
|
||||
from doctr.io import DocumentFile
|
||||
doc = DocumentFile.from_images(str(img_path))
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result = predictor(doc)
|
||||
t_cold = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result2 = predictor(doc)
|
||||
t_warm = time.perf_counter() - t0
|
||||
|
||||
mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
tracemalloc.stop()
|
||||
|
||||
texts = []
|
||||
bboxes = []
|
||||
for page in result.pages:
|
||||
for block in page.blocks:
|
||||
for line in block.lines:
|
||||
for word in line.words:
|
||||
texts.append({"text": word.value, "confidence": word.confidence})
|
||||
# docTR bbox in relative coords (0-1)
|
||||
bbox = word.geometry
|
||||
# Convert relative to pixel
|
||||
import PIL.Image
|
||||
with PIL.Image.open(img_path) as im:
|
||||
w, h = im.size
|
||||
cx = (bbox[0][0] + bbox[1][0]) / 2 * w
|
||||
cy = (bbox[0][1] + bbox[1][1]) / 2 * h
|
||||
bboxes.append({
|
||||
"bbox_relative": [(bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1])],
|
||||
"center_px": (round(cx, 1), round(cy, 1)),
|
||||
"text": word.value,
|
||||
})
|
||||
|
||||
return {
|
||||
"engine": "doctr",
|
||||
"image": str(img_path),
|
||||
"cold_latency_s": round(t_cold, 3),
|
||||
"warm_latency_s": round(t_warm, 3),
|
||||
"mem_init_MB": round(mem_init, 1),
|
||||
"mem_peak_MB": round(mem_peak, 1),
|
||||
"num_detections": len(texts),
|
||||
"texts": texts,
|
||||
"bboxes": bboxes,
|
||||
"version": "1.0.1",
|
||||
"device": "cpu",
|
||||
"label": "baseline CPU",
|
||||
}
|
||||
|
||||
|
||||
def bench_easyocr(img_path: Path) -> dict:
|
||||
"""Run EasyOCR CPU on image."""
|
||||
import easyocr
|
||||
|
||||
tracemalloc.start()
|
||||
reader = easyocr.Reader(["fr"], gpu=False)
|
||||
mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result = reader.readtext(str(img_path))
|
||||
t_cold = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result2 = reader.readtext(str(img_path))
|
||||
t_warm = time.perf_counter() - t0
|
||||
|
||||
mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
tracemalloc.stop()
|
||||
|
||||
texts = []
|
||||
bboxes = []
|
||||
for detection in result:
|
||||
bbox_raw = detection[0] # list of [x,y] points
|
||||
text = detection[1]
|
||||
confidence = detection[2]
|
||||
xs = [pt[0] for pt in bbox_raw]
|
||||
ys = [pt[1] for pt in bbox_raw]
|
||||
cx = sum(xs) / len(xs)
|
||||
cy = sum(ys) / len(ys)
|
||||
texts.append({"text": text, "confidence": confidence})
|
||||
bboxes.append({"bbox": bbox_raw, "center_px": (round(cx, 1), round(cy, 1)), "text": text})
|
||||
|
||||
return {
|
||||
"engine": "easyocr",
|
||||
"image": str(img_path),
|
||||
"cold_latency_s": round(t_cold, 3),
|
||||
"warm_latency_s": round(t_warm, 3),
|
||||
"mem_init_MB": round(mem_init, 1),
|
||||
"mem_peak_MB": round(mem_peak, 1),
|
||||
"num_detections": len(texts),
|
||||
"texts": texts,
|
||||
"bboxes": bboxes,
|
||||
"version": "1.7.2",
|
||||
"device": "cpu",
|
||||
"label": "baseline CPU",
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
# Check image exists
|
||||
img = TEST_IMAGE if TEST_IMAGE.exists() else EASILY_IMAGE
|
||||
if not img.exists():
|
||||
print(f"ERROR: No test image found. Tried {TEST_IMAGE} and {EASILY_IMAGE}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Bench image: {img}")
|
||||
print(f"Image size: ...")
|
||||
import PIL.Image
|
||||
with PIL.Image.open(img) as im:
|
||||
w, h = im.size
|
||||
print(f" {w}x{h}, mode={im.mode}")
|
||||
|
||||
all_results = {}
|
||||
|
||||
# ── PP-OCRv5 CPU ──
|
||||
print("\n=== PP-OCRv5 CPU ===")
|
||||
try:
|
||||
r = bench_ppocrv5_cpu(img)
|
||||
all_results["ppocrv5_cpu"] = r
|
||||
print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
|
||||
print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
all_results["ppocrv5_cpu"] = {"error": str(e)}
|
||||
|
||||
# ── docTR ──
|
||||
print("\n=== docTR CPU ===")
|
||||
try:
|
||||
r = bench_doctr(img)
|
||||
all_results["doctr"] = r
|
||||
print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
|
||||
print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
all_results["doctr"] = {"error": str(e)}
|
||||
|
||||
# ── EasyOCR ──
|
||||
print("\n=== EasyOCR CPU ===")
|
||||
try:
|
||||
r = bench_easyocr(img)
|
||||
all_results["easyocr"] = r
|
||||
print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
|
||||
print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
all_results["easyocr"] = {"error": str(e)}
|
||||
|
||||
# Save JSON
|
||||
with open(RESULTS_JSON, "w") as f:
|
||||
json.dump(all_results, f, indent=2, default=str)
|
||||
print(f"\nResults saved to {RESULTS_JSON}")
|
||||
|
||||
# ── Synthesis table ──
|
||||
print("\n=== Synthesis ===")
|
||||
print(f"{'Engine':<15} {'Cold(s)':<10} {'Warm(s)':<10} {'Det':<6} {'Mem(MB)':<10} {'Label'}")
|
||||
for eng, r in all_results.items():
|
||||
if "error" in r:
|
||||
print(f"{eng:<15} FAILED")
|
||||
continue
|
||||
print(f"{eng:<15} {r['cold_latency_s']:<10} {r['warm_latency_s']:<10} {r['num_detections']:<6} {r['mem_peak_MB']:<10} {r.get('label', '')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user