feat(extraction): modèle DB dossier patient extrait (Job/Table/Field)
ExtractionJob -> ExtractedTable -> ExtractedField (SQLAlchemy, cascade), avec preuve par cellule (bbox + confidence) réutilisant la sémantique VWBEvidence, et statut dossier needs_review|complete. Brique 2 de la verticale extraction. Documenté : ce canal conserve les données patient EN CLAIR (≠ canal apprentissage anonymisé) — aucune anonymisation ne doit cibler ces colonnes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test TDD — Extraction (brique 2) : modèle « dossier patient extrait ».
|
||||
|
||||
Objectif : valider les 3 modèles métier d'extraction (absents avant cette brique) :
|
||||
ExtractionJob → ExtractedTable → ExtractedField
|
||||
avec leurs relations, cascade, et le `status` ∈ {complete, needs_review}.
|
||||
|
||||
⚠️ CANAL EXTRACTION ≠ canal apprentissage : ici on conserve les **vraies
|
||||
données patient** (le but est de constituer le dossier). Pas d'anonymisation.
|
||||
Le test pose donc une valeur patient en clair et vérifie qu'elle est restituée
|
||||
telle quelle.
|
||||
|
||||
Isolation (même pattern que test_import_core_workflow_to_db.py) :
|
||||
- pas d'app Flask complète (`app.py`), pas de socketio/blueprints ;
|
||||
- `db` partagé (`db.models.db`) lié à une SQLite **en mémoire**.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
|
||||
_BACKEND = Path(__file__).resolve().parent.parent.parent # .../visual_workflow_builder/backend
|
||||
_ROOT = _BACKEND.parent.parent # .../rpa_vision_v3
|
||||
for p in (str(_ROOT), str(_BACKEND)):
|
||||
if p not in sys.path:
|
||||
sys.path.insert(0, p)
|
||||
|
||||
from db.models import db # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db_app():
|
||||
"""App Flask minimale liée à une SQLite en mémoire, schéma créé."""
|
||||
app = Flask("test_extraction_models")
|
||||
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///:memory:"
|
||||
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
|
||||
db.init_app(app)
|
||||
with app.app_context():
|
||||
db.create_all()
|
||||
yield app
|
||||
db.session.remove()
|
||||
db.drop_all()
|
||||
|
||||
|
||||
def test_extraction_job_table_field_chain(db_app):
|
||||
"""Chaîne complète Job → Table → Field, relations + status par défaut."""
|
||||
from db.models import ExtractionJob, ExtractedTable, ExtractedField
|
||||
|
||||
with db_app.app_context():
|
||||
job = ExtractionJob(
|
||||
id="job_001",
|
||||
patient_ref="MOREL Catherine", # donnée patient EN CLAIR (canal extraction)
|
||||
source_session_id="sess_extract_001",
|
||||
)
|
||||
|
||||
table = ExtractedTable(
|
||||
id="tbl_001",
|
||||
job=job,
|
||||
screen_bbox={"x": 10, "y": 20, "width": 300, "height": 120},
|
||||
screenshot_ref="data/extract/sess_extract_001/screen_0.png",
|
||||
)
|
||||
field = ExtractedField(
|
||||
id="fld_001",
|
||||
table=table,
|
||||
row=0,
|
||||
col=1,
|
||||
value="1975-04-12",
|
||||
bbox={"x": 110, "y": 22, "width": 80, "height": 18},
|
||||
confidence=0.94,
|
||||
)
|
||||
|
||||
db.session.add(job)
|
||||
db.session.commit()
|
||||
|
||||
# status par défaut appliqué à l'INSERT = needs_review (revue humaine requise)
|
||||
assert job.status == "needs_review"
|
||||
|
||||
# Relations descendantes
|
||||
assert job.tables.count() == 1
|
||||
assert job.tables.first().fields.count() == 1
|
||||
|
||||
# Relations remontantes
|
||||
f = ExtractedField.query.get("fld_001")
|
||||
assert f.table.job.patient_ref == "MOREL Catherine" # patient conservé en clair
|
||||
assert f.value == "1975-04-12"
|
||||
assert f.bbox["width"] == 80
|
||||
assert f.confidence == pytest.approx(0.94)
|
||||
assert f.table.screen_bbox["height"] == 120
|
||||
|
||||
|
||||
def test_status_complete_is_accepted(db_app):
|
||||
"""`status` accepte 'complete' (extraction validée)."""
|
||||
from db.models import ExtractionJob
|
||||
|
||||
with db_app.app_context():
|
||||
job = ExtractionJob(id="job_ok", patient_ref="DUPONT Jean", status="complete")
|
||||
db.session.add(job)
|
||||
db.session.commit()
|
||||
assert ExtractionJob.query.get("job_ok").status == "complete"
|
||||
assert job.created_at is not None and isinstance(job.created_at, datetime)
|
||||
|
||||
|
||||
def test_cascade_delete_removes_children(db_app):
|
||||
"""Supprimer le Job supprime tables + fields (cascade, pas d'orphelins)."""
|
||||
from db.models import ExtractionJob, ExtractedTable, ExtractedField
|
||||
|
||||
with db_app.app_context():
|
||||
job = ExtractionJob(id="job_del", patient_ref="X")
|
||||
table = ExtractedTable(id="tbl_del", job=job, screen_bbox={}, screenshot_ref="s.png")
|
||||
ExtractedField(id="fld_del", table=table, row=0, col=0, value="v",
|
||||
bbox={}, confidence=0.5)
|
||||
db.session.add(job)
|
||||
db.session.commit()
|
||||
|
||||
db.session.delete(job)
|
||||
db.session.commit()
|
||||
|
||||
assert ExtractionJob.query.count() == 0
|
||||
assert ExtractedTable.query.count() == 0
|
||||
assert ExtractedField.query.count() == 0
|
||||
Reference in New Issue
Block a user