feat(extraction): modèle DB dossier patient extrait (Job/Table/Field)
ExtractionJob -> ExtractedTable -> ExtractedField (SQLAlchemy, cascade), avec preuve par cellule (bbox + confidence) réutilisant la sémantique VWBEvidence, et statut dossier needs_review|complete. Brique 2 de la verticale extraction. Documenté : ce canal conserve les données patient EN CLAIR (≠ canal apprentissage anonymisé) — aucune anonymisation ne doit cibler ces colonnes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -321,6 +321,70 @@ class ExecutionStep(db.Model):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Extraction — « dossier patient extrait » (brique 2)
|
||||||
|
#
|
||||||
|
# ⚠️ CANAL EXTRACTION ≠ canal apprentissage. Ces tables conservent les
|
||||||
|
# VRAIES données patient (patient_ref, ExtractedField.value) : c'est le but,
|
||||||
|
# constituer le dossier. Elles NE doivent PAS être anonymisées/tokenisées
|
||||||
|
# (à l'inverse du canal apprentissage, cf. pii_sanitizer). Aucun appel
|
||||||
|
# d'assainissement PII ne doit cibler ces colonnes.
|
||||||
|
#
|
||||||
|
# Sémantique de preuve réutilisée de contracts/evidence.py (VWBEvidence) :
|
||||||
|
# screenshot_ref ≈ screenshot, screen_bbox/bbox ≈ highlight_box, confidence
|
||||||
|
# ≈ confidence_score, created_at ≈ timestamp.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ExtractionJob(db.Model):
|
||||||
|
"""Dossier patient extrait — racine d'une session d'extraction."""
|
||||||
|
__tablename__ = 'extraction_jobs'
|
||||||
|
|
||||||
|
id = db.Column(db.String(64), primary_key=True)
|
||||||
|
patient_ref = db.Column(db.String(255), nullable=True) # donnée patient EN CLAIR (volontaire)
|
||||||
|
source_session_id = db.Column(db.String(64), nullable=True)
|
||||||
|
created_at = db.Column(db.DateTime, default=datetime.utcnow)
|
||||||
|
# status: 'needs_review' (revue humaine requise) | 'complete' (validé)
|
||||||
|
status = db.Column(db.String(32), default='needs_review')
|
||||||
|
|
||||||
|
tables = db.relationship('ExtractedTable', backref='job', lazy='dynamic',
|
||||||
|
cascade='all, delete-orphan')
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'<ExtractionJob {self.id}: {self.status}>'
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractedTable(db.Model):
|
||||||
|
"""Tableau extrait d'un écran (preuve : screenshot_ref + screen_bbox)."""
|
||||||
|
__tablename__ = 'extracted_tables'
|
||||||
|
|
||||||
|
id = db.Column(db.String(64), primary_key=True)
|
||||||
|
job_id = db.Column(db.String(64), db.ForeignKey('extraction_jobs.id'), nullable=False)
|
||||||
|
screen_bbox = db.Column(db.JSON, nullable=True) # {x, y, width, height}
|
||||||
|
screenshot_ref = db.Column(db.String(512), nullable=True)
|
||||||
|
|
||||||
|
fields = db.relationship('ExtractedField', backref='table', lazy='dynamic',
|
||||||
|
cascade='all, delete-orphan')
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'<ExtractedTable {self.id}>'
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractedField(db.Model):
|
||||||
|
"""Cellule extraite (donnée patient EN CLAIR) + preuve bbox/confidence."""
|
||||||
|
__tablename__ = 'extracted_fields'
|
||||||
|
|
||||||
|
id = db.Column(db.String(64), primary_key=True)
|
||||||
|
table_id = db.Column(db.String(64), db.ForeignKey('extracted_tables.id'), nullable=False)
|
||||||
|
row = db.Column(db.Integer, nullable=True)
|
||||||
|
col = db.Column(db.Integer, nullable=True)
|
||||||
|
value = db.Column(db.Text, nullable=True) # valeur patient EN CLAIR (volontaire)
|
||||||
|
bbox = db.Column(db.JSON, nullable=True) # {x, y, width, height}
|
||||||
|
confidence = db.Column(db.Float, nullable=True)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'<ExtractedField {self.id}: r{self.row}c{self.col}>'
|
||||||
|
|
||||||
|
|
||||||
# Session active (en mémoire, pas en DB)
|
# Session active (en mémoire, pas en DB)
|
||||||
class SessionState:
|
class SessionState:
|
||||||
"""État de la session utilisateur (en mémoire)"""
|
"""État de la session utilisateur (en mémoire)"""
|
||||||
|
|||||||
@@ -0,0 +1,124 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test TDD — Extraction (brique 2) : modèle « dossier patient extrait ».
|
||||||
|
|
||||||
|
Objectif : valider les 3 modèles métier d'extraction (absents avant cette brique) :
|
||||||
|
ExtractionJob → ExtractedTable → ExtractedField
|
||||||
|
avec leurs relations, cascade, et le `status` ∈ {complete, needs_review}.
|
||||||
|
|
||||||
|
⚠️ CANAL EXTRACTION ≠ canal apprentissage : ici on conserve les **vraies
|
||||||
|
données patient** (le but est de constituer le dossier). Pas d'anonymisation.
|
||||||
|
Le test pose donc une valeur patient en clair et vérifie qu'elle est restituée
|
||||||
|
telle quelle.
|
||||||
|
|
||||||
|
Isolation (même pattern que test_import_core_workflow_to_db.py) :
|
||||||
|
- pas d'app Flask complète (`app.py`), pas de socketio/blueprints ;
|
||||||
|
- `db` partagé (`db.models.db`) lié à une SQLite **en mémoire**.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from flask import Flask
|
||||||
|
|
||||||
|
_BACKEND = Path(__file__).resolve().parent.parent.parent # .../visual_workflow_builder/backend
|
||||||
|
_ROOT = _BACKEND.parent.parent # .../rpa_vision_v3
|
||||||
|
for p in (str(_ROOT), str(_BACKEND)):
|
||||||
|
if p not in sys.path:
|
||||||
|
sys.path.insert(0, p)
|
||||||
|
|
||||||
|
from db.models import db # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db_app():
|
||||||
|
"""App Flask minimale liée à une SQLite en mémoire, schéma créé."""
|
||||||
|
app = Flask("test_extraction_models")
|
||||||
|
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///:memory:"
|
||||||
|
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
|
||||||
|
db.init_app(app)
|
||||||
|
with app.app_context():
|
||||||
|
db.create_all()
|
||||||
|
yield app
|
||||||
|
db.session.remove()
|
||||||
|
db.drop_all()
|
||||||
|
|
||||||
|
|
||||||
|
def test_extraction_job_table_field_chain(db_app):
|
||||||
|
"""Chaîne complète Job → Table → Field, relations + status par défaut."""
|
||||||
|
from db.models import ExtractionJob, ExtractedTable, ExtractedField
|
||||||
|
|
||||||
|
with db_app.app_context():
|
||||||
|
job = ExtractionJob(
|
||||||
|
id="job_001",
|
||||||
|
patient_ref="MOREL Catherine", # donnée patient EN CLAIR (canal extraction)
|
||||||
|
source_session_id="sess_extract_001",
|
||||||
|
)
|
||||||
|
|
||||||
|
table = ExtractedTable(
|
||||||
|
id="tbl_001",
|
||||||
|
job=job,
|
||||||
|
screen_bbox={"x": 10, "y": 20, "width": 300, "height": 120},
|
||||||
|
screenshot_ref="data/extract/sess_extract_001/screen_0.png",
|
||||||
|
)
|
||||||
|
field = ExtractedField(
|
||||||
|
id="fld_001",
|
||||||
|
table=table,
|
||||||
|
row=0,
|
||||||
|
col=1,
|
||||||
|
value="1975-04-12",
|
||||||
|
bbox={"x": 110, "y": 22, "width": 80, "height": 18},
|
||||||
|
confidence=0.94,
|
||||||
|
)
|
||||||
|
|
||||||
|
db.session.add(job)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
# status par défaut appliqué à l'INSERT = needs_review (revue humaine requise)
|
||||||
|
assert job.status == "needs_review"
|
||||||
|
|
||||||
|
# Relations descendantes
|
||||||
|
assert job.tables.count() == 1
|
||||||
|
assert job.tables.first().fields.count() == 1
|
||||||
|
|
||||||
|
# Relations remontantes
|
||||||
|
f = ExtractedField.query.get("fld_001")
|
||||||
|
assert f.table.job.patient_ref == "MOREL Catherine" # patient conservé en clair
|
||||||
|
assert f.value == "1975-04-12"
|
||||||
|
assert f.bbox["width"] == 80
|
||||||
|
assert f.confidence == pytest.approx(0.94)
|
||||||
|
assert f.table.screen_bbox["height"] == 120
|
||||||
|
|
||||||
|
|
||||||
|
def test_status_complete_is_accepted(db_app):
|
||||||
|
"""`status` accepte 'complete' (extraction validée)."""
|
||||||
|
from db.models import ExtractionJob
|
||||||
|
|
||||||
|
with db_app.app_context():
|
||||||
|
job = ExtractionJob(id="job_ok", patient_ref="DUPONT Jean", status="complete")
|
||||||
|
db.session.add(job)
|
||||||
|
db.session.commit()
|
||||||
|
assert ExtractionJob.query.get("job_ok").status == "complete"
|
||||||
|
assert job.created_at is not None and isinstance(job.created_at, datetime)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cascade_delete_removes_children(db_app):
|
||||||
|
"""Supprimer le Job supprime tables + fields (cascade, pas d'orphelins)."""
|
||||||
|
from db.models import ExtractionJob, ExtractedTable, ExtractedField
|
||||||
|
|
||||||
|
with db_app.app_context():
|
||||||
|
job = ExtractionJob(id="job_del", patient_ref="X")
|
||||||
|
table = ExtractedTable(id="tbl_del", job=job, screen_bbox={}, screenshot_ref="s.png")
|
||||||
|
ExtractedField(id="fld_del", table=table, row=0, col=0, value="v",
|
||||||
|
bbox={}, confidence=0.5)
|
||||||
|
db.session.add(job)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
db.session.delete(job)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
assert ExtractionJob.query.count() == 0
|
||||||
|
assert ExtractedTable.query.count() == 0
|
||||||
|
assert ExtractedField.query.count() == 0
|
||||||
Reference in New Issue
Block a user