fix: filtre bruit Trackare — antécédents parasites + répétitions DAS

- das_filter: regex anti-répétition gère les espaces entre mots concaténés
  ("VentilationVentilation Ventilation..." désormais rejeté)
- cim10_extractor: regex antécédents s'arrête à "Signes Vitaux" (ne capture
  plus le tableau de surveillance)
- Nouveau _is_valid_antecedent() filtre noms de service, mots de surveillance
  isolés, infos admin (RPPS), répétitions, Mode de vie
- 28 nouveaux tests (TestIsValidAntecedent + das_filter repetition)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-18 19:20:50 +01:00
parent f7d87f2602
commit fe22c0f0f5
4 changed files with 145 additions and 18 deletions

View File

@@ -68,6 +68,15 @@ class TestIsValidDiagnosticText:
def test_reject_concatenated_words_long(self):
assert not is_valid_diagnostic_text("ventilationventilationventilation")
def test_reject_concatenated_with_spaces(self):
"""VentilationVentilation Ventilation... (mixed concat+spaces) is rejected."""
assert not is_valid_diagnostic_text(
"VentilationVentilationVentilation VentilationVentilationVentilationVentilationVentilationVentilationVentilationVentilation"
)
def test_reject_concatenated_with_trailing_space(self):
assert not is_valid_diagnostic_text("ventilationventilation ")
def test_reject_repeated_words(self):
assert not is_valid_diagnostic_text("Spontanée spontanée spontanée spontanée")

View File

@@ -7,6 +7,7 @@ from src.medical.cim10_extractor import (
extract_medical_info,
_lookup_cim10,
_is_abnormal,
_is_valid_antecedent,
)
from src.medical.cim10_dict import normalize_text, load_dict, lookup, reset_cache
from src.extraction.document_classifier import classify, classify_with_confidence
@@ -430,6 +431,90 @@ class TestTraitementEdgeCases:
assert "7 jours" in ttt[0].posologie
class TestIsValidAntecedent:
"""Tests pour le filtre d'antécédents parasites Trackare."""
# --- Vrais antécédents (acceptés) ---
def test_accept_syndrome(self):
assert _is_valid_antecedent("Syndrome anxio depressif")
def test_accept_fracture(self):
assert _is_valid_antecedent("fracture des deux humérus en 2017")
def test_accept_hta_diabete(self):
assert _is_valid_antecedent("HTA, diabète type 2")
def test_accept_bilan_neurologique(self):
assert _is_valid_antecedent("Bilan neurologique: IRMc leucopathie vasculaire")
# --- Bruit surveillance Trackare (rejetés) ---
def test_reject_ventilation_concat(self):
assert not _is_valid_antecedent(
"VentilationVentilationVentilation VentilationVentilationVentilation"
)
def test_reject_spontanee_repeated(self):
assert not _is_valid_antecedent(
"spontanée spontanée spontanée spontanée spontanée"
)
def test_reject_air_repeated(self):
assert not _is_valid_antecedent("Air Air Air Air Air Air Air")
def test_reject_ambiant_repeated(self):
assert not _is_valid_antecedent("ambiant ambiant ambiant ambiant")
def test_reject_en_repeated(self):
assert not _is_valid_antecedent("EN EN EN EN")
def test_reject_moyenne_single(self):
assert not _is_valid_antecedent("Moyenne")
def test_reject_ventilation_single(self):
assert not _is_valid_antecedent("Ventilation")
def test_reject_echelle_single(self):
assert not _is_valid_antecedent("Echelle")
def test_reject_glycemie_single(self):
assert not _is_valid_antecedent("Glycémie")
def test_reject_capillaire_single(self):
assert not _is_valid_antecedent("capillaire")
def test_reject_gauche_single(self):
assert not _is_valid_antecedent("Gauche")
# --- Bruit administratif (rejetés) ---
def test_reject_service_name_caps(self):
assert not _is_valid_antecedent("MEDECINE INTERNE ET")
def test_reject_immunologie_caps(self):
assert not _is_valid_antecedent("IMMUNOLOGIE CLINIQUE")
def test_reject_rpps(self):
assert not _is_valid_antecedent("N° RPPS [RPPS_7]")
def test_reject_medecin_hospitalier(self):
assert not _is_valid_antecedent("[MEDECIN] Hospitalier")
def test_reject_mode_de_vie(self):
assert not _is_valid_antecedent("Mode de vie : divorcée, une fille")
def test_reject_texte_libre(self):
assert not _is_valid_antecedent("(texte libre)")
# --- Cas limites ---
def test_reject_too_short(self):
assert not _is_valid_antecedent("de Bo")
def test_reject_starts_with_digit(self):
assert not _is_valid_antecedent("97,00 100,00 98,00")
def test_reject_empty(self):
assert not _is_valid_antecedent("")
class TestClassifierConfidence:
"""Tests pour classify_with_confidence."""