rpa_vision_v3/tests/unit/test_text_match_fuzzy_prefix.py

"""Tests C-P1 : tolérance préfixe dans _text_match_fuzzy.

Cas réel : OCR partiel `observed='Enregi'` sur cible `expected='Enregistrer'`
provoquait un rejet pre-check `expected='Enregistrer' observed='Enregi'`.
Patch : accepter si observed est préfixe d'expected avec len ≥ 4 et ≥ 50%
de la longueur expected.

Référence : inbox_claude/2026-05-25_1938_codex-to-claude_TACHES-projet-ocr-d5v3c-lea.md
Fix : agent_v0/server_v1/resolve_engine.py:_text_match_fuzzy
"""
from __future__ import annotations

import sys
from pathlib import Path

import pytest


ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))


# ────────────────────────────────────────────────────────────────────────────
# Cas qui MOTIVENT le patch (rejet incorrect avant)
# ────────────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_enregi_matches_enregistrer():
    """Cas réel rapporté Codex : OCR partiel 'Enregi' sur 'Enregistrer' doit matcher."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Enregistrer", "Enregi") is True


@pytest.mark.unit
def test_coller_matches_collier():
    """Préfixe 4 chars sur 6 (66%) doit matcher."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Coller", "Coll") is True


@pytest.mark.unit
def test_cancel_matches_canc():
    """Préfixe 4 chars sur 6 (66%) doit matcher."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Cancel", "Canc") is True


# ────────────────────────────────────────────────────────────────────────────
# Garde-fous : préfixes trop courts/faibles DOIVENT être rejetés
# ────────────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_save_does_not_match_sa_too_short():
    """Préfixe < 4 chars rejeté (faux positif risque élevé)."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Save", "Sa") is False


@pytest.mark.unit
def test_bouton_does_not_match_bo_too_short():
    """Préfixe 2 chars rejeté (faux positif probable)."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Bouton", "Bo") is False


@pytest.mark.unit
def test_enregistrer_sous_does_not_match_enregi_below_50pct():
    """Préfixe 6 chars sur 16 (37% < 50%) rejeté (trop ambigu).

    Ce cas évite que 'Enregi' (partiel de 'Enregistrer') soit accepté pour
    'Enregistrer sous' alors qu'il devrait viser 'Enregistrer' tout court.
    """
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    # _normalize_for_match retire les espaces ? À vérifier. Si oui, observed
    # doit être préfixe de "enregistrersous" (15 chars), 6/15 = 40% < 50%.
    assert _text_match_fuzzy("Enregistrer sous", "Enregi") is False


@pytest.mark.unit
def test_save_matches_save_substring_unchanged():
    """Cas existant substring : 'Save' dans 'Saved' doit toujours matcher."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Save", "Saved") is True


# ────────────────────────────────────────────────────────────────────────────
# Comportements existants préservés (regression guards)
# ────────────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_token_matching_still_works():
    """Cas multi-tokens existant : 'coller saisir dossier patient' / 'u saisir le dossier patient' → 3/4 ≥ 60%."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy(
        "Coller ou saisir le dossier patient",
        "u saisir le dossier patient",
    ) is True


@pytest.mark.unit
def test_unrelated_text_still_rejected():
    """Texte totalement différent toujours rejeté."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Enregistrer", "Annuler") is False


@pytest.mark.unit
def test_empty_expected_returns_true():
    """Expected vide = pas de contrainte = match."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("", "anything") is True


@pytest.mark.unit
def test_empty_observed_does_not_match_non_empty_expected():
    """Observed vide rejeté (sauf si expected vide aussi)."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    assert _text_match_fuzzy("Enregistrer", "") is False


# ────────────────────────────────────────────────────────────────────────────
# Edge cases préfixe
# ────────────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_prefix_exactly_4_chars_at_50_pct():
    """4 chars / 8 chars = 50% exact + len ≥ 4 → ACCEPT."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    # "Continue" = 8 chars, "Cont" = 4 chars = 50%
    assert _text_match_fuzzy("Continue", "Cont") is True


@pytest.mark.unit
def test_prefix_3_chars_rejected_even_if_high_ratio():
    """3 chars rejeté même si ≥ 50% (garde-fou minimum 4 chars)."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    # "Sa" = 2 chars, "Save" = 4 chars (50%) → rejeté car < 4 chars
    assert _text_match_fuzzy("Save", "Sav") is False


@pytest.mark.unit
def test_prefix_not_strict_prefix_rejected():
    """Si observed n'est PAS un préfixe strict, prefix rule ne s'applique pas."""
    from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
    # "Enregistrer" / "Sauver" : pas substring, pas préfixe, 1 token "sauver"
    # absent de "enregistrer" → 0/1 < 0.60 → False
    assert _text_match_fuzzy("Enregistrer", "Sauver") is False