Files
rpa_vision_v3/tests/unit/test_extract_dossier.py
Dom 13f760a3b9 feat(extraction): handler extract_dossier + pont worker→DB VWB mutualisé (brique 3)
vwb_db.py : couplage worker→DB VWB lazy (app Flask sur instance/workflows.db)
mutualisé (R1 + extraction), + persist_extracted_dossier (grille → Job/Table/Field).
replay_engine.py : handler _handle_extract_dossier_action — lit le screenshot,
extrait une grille structurée, gate qualité conservatrice (complete|needs_review),
persiste avec preuve (screenshot_ref/bbox/confidence). N'échoue JAMAIS le replay.
Données patient EN CLAIR (canal extraction, non anonymisé).

Réserve : dispatch runtime (api_stream.py) non encore branché — étape suivante,
à coordonner. Brique 3/4 de la verticale extraction dossier patient.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-29 14:18:08 +02:00

220 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests TDD — Extraction « dossier patient » (brique 3).
Deux couches testées :
1. ``vwb_db.persist_extracted_dossier`` : depuis une grille OCR
(List[List[cell]]), crée ExtractionJob → ExtractedTable → ExtractedField
et commit. Testé sur SQLite mémoire via un app-context Flask jetable
(PAS la vraie DB VWB — isolation).
2. ``replay_engine._handle_extract_dossier_action`` : lit last_screenshot,
appelle ``extract_grid_from_image`` (mocké), applique la gate qualité
(complete / needs_review), persiste via vwb_db et n'échoue JAMAIS le
replay (grille vide → needs_review, sans lever).
⚠️ Canal extraction = données patient EN CLAIR (volontaire) : on vérifie
que les valeurs sont persistées telles quelles, sans tokenisation.
"""
import pytest
from flask import Flask
# vwb_db ajoute visual_workflow_builder/backend au sys.path à l'import →
# doit précéder l'import de db.models (couplage worker→DB VWB mutualisé).
import agent_v0.server_v1.vwb_db as vwb_db
import agent_v0.server_v1.replay_engine as replay_engine
from db.models import db, ExtractionJob, ExtractedTable, ExtractedField
# ---------------------------------------------------------------------------
# Fixtures : app Flask jetable sur SQLite mémoire (isolation totale)
# ---------------------------------------------------------------------------
@pytest.fixture
def mem_app():
"""App Flask minimale liée à une DB SQLite en mémoire."""
app = Flask("test_extract_dossier")
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///:memory:"
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
db.init_app(app)
with app.app_context():
db.create_all()
yield app
def _grid_2x2():
"""Grille connue 2×2 (confiances hautes)."""
return [
[
{"text": "Nom", "bbox": [[0, 0], [1, 0], [1, 1], [0, 1]], "confidence": 0.95, "row": 0, "col": 0},
{"text": "MOREL", "bbox": [[2, 0], [3, 0], [3, 1], [2, 1]], "confidence": 0.92, "row": 0, "col": 1},
],
[
{"text": "IPP", "bbox": [[0, 2], [1, 2], [1, 3], [0, 3]], "confidence": 0.90, "row": 1, "col": 0},
{"text": "25123456", "bbox": [[2, 2], [3, 2], [3, 3], [2, 3]], "confidence": 0.88, "row": 1, "col": 1},
],
]
# ---------------------------------------------------------------------------
# 1) persist_extracted_dossier
# ---------------------------------------------------------------------------
@pytest.mark.unit
def test_persist_extracted_dossier_creates_job_table_fields(mem_app):
job_id = vwb_db.persist_extracted_dossier(
_grid_2x2(),
patient_ref="MOREL Catherine",
source_session_id="sess-42",
screenshot_ref="/captures/last.png",
screen_bbox={"x": 0, "y": 0, "width": 800, "height": 600},
status="complete",
)
assert isinstance(job_id, str) and job_id
job = db.session.get(ExtractionJob, job_id)
assert job is not None
assert job.status == "complete"
assert job.patient_ref == "MOREL Catherine" # EN CLAIR, non tokenisé
assert job.source_session_id == "sess-42"
tables = ExtractedTable.query.filter_by(job_id=job_id).all()
assert len(tables) == 1
assert tables[0].screenshot_ref == "/captures/last.png"
assert tables[0].screen_bbox == {"x": 0, "y": 0, "width": 800, "height": 600}
fields = ExtractedField.query.filter_by(table_id=tables[0].id).all()
assert len(fields) == 4 # 2×2 cellules
values = {(f.row, f.col): f.value for f in fields}
assert values[(0, 1)] == "MOREL" # valeur patient EN CLAIR conservée
assert values[(1, 1)] == "25123456"
confs = {(f.row, f.col): f.confidence for f in fields}
assert confs[(0, 0)] == pytest.approx(0.95)
@pytest.mark.unit
def test_persist_extracted_dossier_empty_grid_still_creates_job(mem_app):
"""Grille vide → Job + Table sans Field (statut transmis tel quel)."""
job_id = vwb_db.persist_extracted_dossier(
[],
patient_ref=None,
source_session_id="sess-empty",
screenshot_ref="/captures/empty.png",
screen_bbox=None,
status="needs_review",
)
job = db.session.get(ExtractionJob, job_id)
assert job is not None and job.status == "needs_review"
tables = ExtractedTable.query.filter_by(job_id=job_id).all()
assert len(tables) == 1
assert ExtractedField.query.filter_by(table_id=tables[0].id).count() == 0
# ---------------------------------------------------------------------------
# 2) _handle_extract_dossier_action
# ---------------------------------------------------------------------------
@pytest.mark.unit
def test_handle_extract_dossier_complete(mem_app, monkeypatch, tmp_path):
# screenshot bidon sur disque (le mock OCR ignore le contenu)
shot = tmp_path / "shot.png"
shot.write_bytes(b"\x89PNG")
# extract_grid_from_image mocké → grille 2×2 de confiance haute
monkeypatch.setattr(
"core.llm.extract_grid_from_image",
lambda *a, **k: _grid_2x2(),
)
# vwb_app_context pointé sur l'app mémoire de la fixture
monkeypatch.setattr(vwb_db, "vwb_app_context", lambda: mem_app.app_context())
monkeypatch.setattr(replay_engine, "vwb_db", vwb_db, raising=False)
replay_state = {
"last_screenshot": str(shot),
"variables": {},
"replay_id": "rep-1",
}
action = {
"type": "extract_dossier",
"parameters": {
"output_var": "dossier_id",
"patient_ref": "MOREL Catherine",
"expected_cols": 2,
"min_confidence": 0.5,
},
}
ok = replay_engine._handle_extract_dossier_action(action, replay_state, "sess-42")
assert ok is True
job_id = replay_state["variables"]["dossier_id"]
assert isinstance(job_id, str) and job_id
with mem_app.app_context():
job = db.session.get(ExtractionJob, job_id)
assert job is not None
assert job.status == "complete" # gate OK : non vide, conf ok, 2 cols
@pytest.mark.unit
def test_handle_extract_dossier_low_confidence_needs_review(mem_app, monkeypatch, tmp_path):
shot = tmp_path / "shot.png"
shot.write_bytes(b"\x89PNG")
low_grid = [
[{"text": "x", "bbox": [], "confidence": 0.10, "row": 0, "col": 0}],
]
monkeypatch.setattr("core.llm.extract_grid_from_image", lambda *a, **k: low_grid)
monkeypatch.setattr(vwb_db, "vwb_app_context", lambda: mem_app.app_context())
replay_state = {"last_screenshot": str(shot), "variables": {}, "replay_id": "rep-2"}
action = {"type": "extract_dossier", "parameters": {"min_confidence": 0.5}}
ok = replay_engine._handle_extract_dossier_action(action, replay_state, "sess-low")
assert ok is False # gate a basculé en needs_review
job_id = replay_state["variables"]["extracted_dossier"]
with mem_app.app_context():
assert db.session.get(ExtractionJob, job_id).status == "needs_review"
@pytest.mark.unit
def test_handle_extract_dossier_empty_grid_no_raise(mem_app, monkeypatch, tmp_path):
shot = tmp_path / "shot.png"
shot.write_bytes(b"\x89PNG")
monkeypatch.setattr("core.llm.extract_grid_from_image", lambda *a, **k: [])
monkeypatch.setattr(vwb_db, "vwb_app_context", lambda: mem_app.app_context())
replay_state = {"last_screenshot": str(shot), "variables": {}, "replay_id": "rep-3"}
action = {"type": "extract_dossier", "parameters": {}}
# Ne lève jamais ; grille vide → needs_review
ok = replay_engine._handle_extract_dossier_action(action, replay_state, "sess-empty")
assert ok is False
job_id = replay_state["variables"]["extracted_dossier"]
with mem_app.app_context():
assert db.session.get(ExtractionJob, job_id).status == "needs_review"
@pytest.mark.unit
def test_handle_extract_dossier_persist_failure_no_raise(mem_app, monkeypatch, tmp_path):
"""Si la persistance lève, le handler log et n'échoue PAS le replay."""
shot = tmp_path / "shot.png"
shot.write_bytes(b"\x89PNG")
monkeypatch.setattr("core.llm.extract_grid_from_image", lambda *a, **k: _grid_2x2())
monkeypatch.setattr(vwb_db, "vwb_app_context", lambda: mem_app.app_context())
def _boom(*a, **k):
raise RuntimeError("DB down")
monkeypatch.setattr(vwb_db, "persist_extracted_dossier", _boom)
replay_state = {"last_screenshot": str(shot), "variables": {}, "replay_id": "rep-4"}
action = {"type": "extract_dossier", "parameters": {}}
ok = replay_engine._handle_extract_dossier_action(action, replay_state, "sess-boom")
assert ok is False # jamais de raise
@pytest.mark.unit
def test_extract_dossier_declared_in_action_type_sets():
assert "extract_dossier" in replay_engine._ALLOWED_ACTION_TYPES
assert "extract_dossier" in replay_engine._SERVER_SIDE_ACTION_TYPES