anonymisation/tests/unit/test_ocr_onnxtr.py

"""Migration OCR docTR → OnnxTR : le moteur OCR est OnnxTR et lit le texte rendu.

Pas de mock : on exerce le vrai predictor OCR du moteur sur une image réelle.
"""
import numpy as np
import pytest
from PIL import Image, ImageDraw, ImageFont

import anonymizer_core_refactored_onnx as core


def test_ocr_engine_is_onnxtr():
    # Après migration : le moteur OCR doit être OnnxTR (ONNX Runtime, sans torch).
    assert core._OCR_AVAILABLE, "moteur OCR indisponible"
    model = core._get_ocr_model()
    assert "onnxtr" in type(model).__module__.lower(), type(model).__module__


@pytest.mark.slow
def test_ocr_reads_rendered_text():
    img = Image.new("RGB", (1400, 300), "white")
    draw = ImageDraw.Draw(img)
    try:
        font = ImageFont.truetype("DejaVuSans-Bold.ttf", 64)
    except OSError:
        try:
            font = ImageFont.truetype("DejaVuSans.ttf", 64)
        except OSError:
            font = ImageFont.load_default()
    words = ["BORDEAUX", "DUPONT", "MARTIN", "BAYONNE"]
    draw.text((40, 110), " ".join(words), fill="black", font=font)

    model = core._get_ocr_model()
    result = model([np.array(img)])
    got = " ".join(
        w.value for b in result.pages[0].blocks for l in b.lines for w in l.words
    ).upper()
    found = sum(1 for w in words if w in got)
    assert found >= 2, f"OCR a lu: {got!r}"