feat: pipeline T2A - anonymisation, extraction CIM-10 et intégration edsnlp

Pipeline complet de traitement de documents médicaux PDF : - Extraction texte (pdfplumber) et classification (Trackare/CRH) - Anonymisation multi-couche (regex + NER CamemBERT + sweep) - Extraction médicale CIM-10 hybride : edsnlp (AP-HP) enrichit les diagnostics, médicaments (codes ATC via Romedi) et négation, avec fallback regex pour les patterns spécifiques - Fix sentencepiece pinné à <0.2.0 pour compatibilité CamemBERT Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 15:24:12 +01:00
commit 4a12cd2676
25 changed files with 7592 additions and 0 deletions
--- a/src/medical/init.py
+++ b/src/medical/init.py
--- a/src/medical/cim10_extractor.py
+++ b/src/medical/cim10_extractor.py
@@ -0,0 +1,606 @@
+"""Extraction d'informations médicales structurées pour le codage CIM-10."""
+
+from __future__ import annotations
+
+import re
+from datetime import datetime
+from typing import Optional
+
+from ..config import (
+    ActeCCAM,
+    BiologieCle,
+    Diagnostic,
+    DossierMedical,
+    Imagerie,
+    Sejour,
+    Traitement,
+)
+
+try:
+    from .edsnlp_pipeline import EdsnlpResult
+except ImportError:
+    EdsnlpResult = None  # type: ignore[assignment,misc]
+
+# Mapping diagnostics fréquents → codes CIM-10
+CIM10_MAP: dict[str, str] = {
+    # Pancréatite
+    "pancréatite aiguë biliaire": "K85.1",
+    "pancréatite aigue biliaire": "K85.1",
+    "pancréatite aiguë lithiasique": "K85.1",
+    "pancréatite aigue lithiasique": "K85.1",
+    "pancréatite aiguë": "K85.9",
+    "pancréatite aigue": "K85.9",
+    "pancréatite": "K85.9",
+    # Lithiases biliaires
+    "lithiase cholédoque": "K80.5",
+    "lithiase du cholédoque": "K80.5",
+    "calcul des canaux biliaires": "K80.5",
+    "lithiase vésiculaire": "K80.2",
+    "lithiases vésiculaires": "K80.2",
+    "vésicule lithiasique": "K80.2",
+    "colique hépatique": "K80.2",
+    # Cholécystite
+    "cholécystite aiguë": "K81.0",
+    "cholecystite aigue": "K81.0",
+    "angiocholite": "K83.0",
+    # Obésité
+    "obésité": "E66.0",
+    "obesite": "E66.0",
+    "surpoids": "E66.0",
+    # Réactions médicamenteuses
+    "éruption médicamenteuse": "L27.0",
+    "eruption medicamenteuse": "L27.0",
+    "éruption cutanée médicamenteuse": "L27.0",
+    "toxidermie": "L27.0",
+    "réaction au tramadol": "L27.0",
+    "allergie médicamenteuse": "T88.7",
+    # Douleur
+    "douleur abdominale": "R10.4",
+    "douleur hypochondre droit": "R10.1",
+    # Ictère
+    "ictère": "R17",
+    "jaunisse": "R17",
+    # HTA
+    "hypertension artérielle": "I10",
+    "hta": "I10",
+    # Diabète
+    "diabète type 2": "E11.9",
+    "diabète de type 2": "E11.9",
+    "diabète type 1": "E10.9",
+}
+
+# Mapping actes → codes CCAM
+CCAM_MAP: dict[str, str] = {
+    "cholécystectomie": "HMFC004",
+    "cholecystectomie": "HMFC004",
+    "cholécystectomie par cœlioscopie": "HMFC004",
+    "cholecystectomie par coelioscopie": "HMFC004",
+    "cholangiographie": "HHHE002",
+    "cholangiographie peropératoire": "HHHE002",
+    "cpre": "HHHE002",
+    "sphinctérotomie endoscopique": "HHHE003",
+    "scanner abdominal": "ZCQK002",
+    "tdm abdominal": "ZCQK002",
+    "échographie abdominale": "ZCQJ001",
+    "echo abdominale": "ZCQJ001",
+    "irm abdominale": "ZCQN001",
+}
+
+
+def extract_medical_info(
+    parsed_data: dict,
+    anonymized_text: str,
+    edsnlp_result: Optional[EdsnlpResult] = None,
+) -> DossierMedical:
+    """Extrait les informations médicales structurées depuis les données parsées et le texte."""
+    dossier = DossierMedical()
+    dossier.document_type = parsed_data.get("type", "")
+
+    _extract_sejour(parsed_data, dossier)
+    _extract_diagnostics(parsed_data, anonymized_text, dossier, edsnlp_result)
+    _extract_actes(anonymized_text, dossier)
+    _extract_antecedents(anonymized_text, dossier)
+    _extract_traitements(parsed_data, anonymized_text, dossier, edsnlp_result)
+    _extract_biologie(anonymized_text, dossier)
+    _extract_imagerie(anonymized_text, dossier)
+    _extract_complications(anonymized_text, dossier, edsnlp_result)
+
+    return dossier
+
+
+def _extract_sejour(parsed: dict, dossier: DossierMedical) -> None:
+    """Extrait les informations de séjour."""
+    patient = parsed.get("patient", {})
+    sejour_data = parsed.get("sejour", {})
+
+    dossier.sejour = Sejour(
+        sexe=patient.get("sexe"),
+        date_entree=sejour_data.get("date_entree"),
+        date_sortie=sejour_data.get("date_sortie"),
+        mode_entree=parsed.get("urgences", {}).get("mode_entree"),
+    )
+
+    # Calcul de l'âge à partir de la date de naissance et de la date d'entrée
+    dob = patient.get("date_naissance")
+    date_entree = sejour_data.get("date_entree")
+    if dob and date_entree:
+        try:
+            dob_dt = datetime.strptime(dob, "%d/%m/%Y")
+            entree_dt = datetime.strptime(date_entree, "%d/%m/%Y")
+            age = entree_dt.year - dob_dt.year
+            if (entree_dt.month, entree_dt.day) < (dob_dt.month, dob_dt.day):
+                age -= 1
+            dossier.sejour.age = age
+        except ValueError:
+            pass
+
+    # Durée de séjour
+    if sejour_data.get("date_entree") and sejour_data.get("date_sortie"):
+        try:
+            d1 = datetime.strptime(sejour_data["date_entree"], "%d/%m/%Y")
+            d2 = datetime.strptime(sejour_data["date_sortie"], "%d/%m/%Y")
+            dossier.sejour.duree_sejour = (d2 - d1).days
+        except ValueError:
+            pass
+
+    # IMC, poids, taille
+    vitals = parsed.get("signes_vitaux", {})
+    if vitals.get("imc"):
+        dossier.sejour.imc = vitals["imc"]
+    elif patient.get("imc"):
+        dossier.sejour.imc = patient["imc"]
+
+    if vitals.get("poids_kg"):
+        dossier.sejour.poids = vitals["poids_kg"]
+    elif patient.get("poids_kg"):
+        dossier.sejour.poids = patient["poids_kg"]
+
+    if vitals.get("taille_cm"):
+        dossier.sejour.taille = vitals["taille_cm"]
+    elif patient.get("taille_cm"):
+        dossier.sejour.taille = patient["taille_cm"]
+
+
+def _extract_diagnostics(
+    parsed: dict,
+    text: str,
+    dossier: DossierMedical,
+    edsnlp_result: Optional[EdsnlpResult] = None,
+) -> None:
+    """Extrait le diagnostic principal et les diagnostics associés."""
+    text_lower = text.lower()
+
+    # Diagnostics codés depuis Trackare (prioritaires)
+    for diag in parsed.get("diagnostics", []):
+        d = Diagnostic(
+            texte=diag.get("libelle", ""),
+            cim10_suggestion=diag.get("code_cim10"),
+        )
+        if diag.get("type", "").lower() == "principal":
+            dossier.diagnostic_principal = d
+        else:
+            dossier.diagnostics_associes.append(d)
+
+    # Extraction du texte "Au total:" ou conclusion
+    conclusion = ""
+    m = re.search(
+        r"Au total\s*[:：]?\s*(.*?)(?=\n\s*(?:Devenir|TTT|Sortie|$))",
+        text,
+        re.DOTALL | re.IGNORECASE,
+    )
+    if m:
+        conclusion = m.group(1).strip()
+
+    # Enrichissement via edsnlp (CIM-10)
+    edsnlp_codes: dict[str, str] = {}
+    if edsnlp_result:
+        for ent in edsnlp_result.cim10_entities:
+            if not ent.negation and not ent.hypothese:
+                edsnlp_codes[ent.code] = ent.texte
+
+    # Si pas de DP depuis le codage, chercher dans le texte
+    if not dossier.diagnostic_principal:
+        # D'abord essayer le fallback regex (plus précis pour les patterns spécifiques)
+        dp = _find_diagnostic_principal(text_lower, conclusion)
+        if dp:
+            dossier.diagnostic_principal = dp
+        elif edsnlp_codes:
+            # Utiliser la première entité CIM-10 edsnlp comme DP
+            code, texte = next(iter(edsnlp_codes.items()))
+            dossier.diagnostic_principal = Diagnostic(
+                texte=texte.capitalize(), cim10_suggestion=code,
+            )
+
+    # Diagnostics associés depuis le texte (regex)
+    das = _find_diagnostics_associes(text_lower, conclusion, dossier)
+    dossier.diagnostics_associes.extend(das)
+
+    # Enrichissement DAS depuis edsnlp
+    if edsnlp_result:
+        existing_codes = set()
+        if dossier.diagnostic_principal:
+            existing_codes.add(dossier.diagnostic_principal.cim10_suggestion)
+        for d in dossier.diagnostics_associes:
+            existing_codes.add(d.cim10_suggestion)
+
+        for ent in edsnlp_result.cim10_entities:
+            if ent.negation or ent.hypothese:
+                continue
+            if ent.code not in existing_codes:
+                dossier.diagnostics_associes.append(Diagnostic(
+                    texte=ent.texte.capitalize(),
+                    cim10_suggestion=ent.code,
+                ))
+                existing_codes.add(ent.code)
+
+
+def _find_diagnostic_principal(text_lower: str, conclusion: str) -> Diagnostic | None:
+    """Trouve le diagnostic principal dans le texte."""
+    conclusion_lower = conclusion.lower()
+
+    # Chercher dans la conclusion d'abord
+    for terme, code in CIM10_MAP.items():
+        if terme in conclusion_lower:
+            return Diagnostic(texte=terme.capitalize(), cim10_suggestion=code)
+
+    # Patterns courants pour le DP
+    dp_patterns = [
+        r"pancréatite\s+aigu[eë]\s+(?:d'origine\s+)?lithiasique",
+        r"pancréatite\s+aigu[eë]\s+biliaire",
+        r"pancréatite\s+aigu[eë]",
+    ]
+    for pat in dp_patterns:
+        if re.search(pat, text_lower):
+            matched = re.search(pat, text_lower).group(0)
+            code = _lookup_cim10(matched)
+            return Diagnostic(texte=matched.capitalize(), cim10_suggestion=code)
+
+    return None
+
+
+def _find_diagnostics_associes(
+    text_lower: str, conclusion: str, dossier: DossierMedical
+) -> list[Diagnostic]:
+    """Trouve les diagnostics associés."""
+    das: list[Diagnostic] = []
+    existing_codes = set()
+    if dossier.diagnostic_principal:
+        existing_codes.add(dossier.diagnostic_principal.cim10_suggestion)
+    for d in dossier.diagnostics_associes:
+        existing_codes.add(d.cim10_suggestion)
+
+    # Lithiase cholédoque
+    if re.search(r"lithiase\s+(?:du\s+)?(?:bas\s+)?cholédoque", text_lower):
+        if "K80.5" not in existing_codes:
+            das.append(Diagnostic(texte="Lithiase du cholédoque", cim10_suggestion="K80.5"))
+            existing_codes.add("K80.5")
+
+    # Éruption médicamenteuse
+    if re.search(r"éruption\s+cutanée|eruption\s+cutanée|toxidermie|réaction\s+au\s+tramadol", text_lower):
+        if "L27.0" not in existing_codes:
+            das.append(Diagnostic(texte="Éruption cutanée médicamenteuse", cim10_suggestion="L27.0"))
+            existing_codes.add("L27.0")
+
+    # Obésité (IMC >= 30)
+    if re.search(r"imc\s*[:=]?\s*(\d{2,3}[.,]\d+)", text_lower):
+        m = re.search(r"imc\s*[:=]?\s*(\d{2,3}[.,]\d+)", text_lower)
+        if m:
+            imc_val = float(m.group(1).replace(",", "."))
+            if imc_val >= 30 and "E66.0" not in existing_codes:
+                das.append(Diagnostic(texte=f"Obésité (IMC {imc_val})", cim10_suggestion="E66.0"))
+                existing_codes.add("E66.0")
+
+    # Lithiases vésiculaires
+    if re.search(r"vésicule\s+lithiasique|lithiases?\s+vésiculaire", text_lower):
+        if "K80.2" not in existing_codes:
+            das.append(Diagnostic(texte="Lithiase vésiculaire", cim10_suggestion="K80.2"))
+            existing_codes.add("K80.2")
+
+    return das
+
+
+def _extract_actes(text: str, dossier: DossierMedical) -> None:
+    """Extrait les actes CCAM."""
+    text_lower = text.lower()
+
+    # Cholécystectomie par cœlioscopie
+    if re.search(r"chol[ée]cystectomie\s+par\s+c[oœ][ea]lioscopie", text_lower):
+        date = _find_act_date(text, r"chol[ée]cystectomie")
+        dossier.actes_ccam.append(ActeCCAM(
+            texte="Cholécystectomie par cœlioscopie",
+            code_ccam_suggestion="HMFC004",
+            date=date,
+        ))
+    elif re.search(r"chol[ée]cystectomie|cholecystectomie", text_lower):
+        date = _find_act_date(text, r"chol[ée]cystectomie|cholecystectomie")
+        dossier.actes_ccam.append(ActeCCAM(
+            texte="Cholécystectomie",
+            code_ccam_suggestion="HMFC004",
+            date=date,
+        ))
+
+    # Cholangiographie
+    if re.search(r"cholangiographie", text_lower):
+        date = _find_act_date(text, r"cholangiographie")
+        dossier.actes_ccam.append(ActeCCAM(
+            texte="Cholangiographie peropératoire",
+            code_ccam_suggestion="HHHE002",
+            date=date,
+        ))
+
+    # TDM
+    if re.search(r"(?:tdm|scanner|tomodensitométrie)", text_lower):
+        date = _find_act_date(text, r"(?:TDM|scanner)")
+        dossier.actes_ccam.append(ActeCCAM(
+            texte="TDM abdominal",
+            code_ccam_suggestion="ZCQK002",
+            date=date,
+        ))
+
+
+def _extract_antecedents(text: str, dossier: DossierMedical) -> None:
+    """Extrait les antécédents."""
+    m = re.search(
+        r"Antécédents?\s*[:：]?\s*\n?(.*?)(?=\n\s*(?:Traitements?\s*[:：]|Allergie|Histoire de la maladie|Examen clinique|\n\n))",
+        text,
+        re.DOTALL | re.IGNORECASE,
+    )
+    if m:
+        block = m.group(1).strip()
+        for line in block.split("\n"):
+            line = line.strip().lstrip("- •")
+            # Filtrer les lignes non pertinentes
+            if (line and len(line) > 5 and line != "0"
+                    and not re.match(r"^\d", line)
+                    and "Item de" not in line
+                    and "surveillance" not in line.lower()
+                    and "Température" not in line
+                    and "Signes Vitaux" not in line
+                    and "Pouls" not in line
+                    and "Type de note" not in line
+                    and "Aucune donnée" not in line
+                    and "renseignée" not in line
+                    and "habitudes de vie" not in line
+                    and "Systolique" not in line
+                    and "Diastolique" not in line
+                    and "Saturation" not in line):
+                dossier.antecedents.append(line)
+
+
+def _extract_traitements(
+    parsed: dict,
+    text: str,
+    dossier: DossierMedical,
+    edsnlp_result: Optional[EdsnlpResult] = None,
+) -> None:
+    """Extrait les traitements de sortie."""
+    # Construire un index des médicaments edsnlp avec codes ATC
+    drug_atc: dict[str, str] = {}
+    if edsnlp_result:
+        for drug in edsnlp_result.drug_entities:
+            if not drug.negation and drug.code_atc:
+                drug_atc[drug.texte.lower()] = drug.code_atc
+
+    # Depuis le texte — section "TTT de sortie" (limiter à quelques lignes)
+    m = re.search(
+        r"(?:TTT|Traitement)\s+de\s+sortie\s*[:：]?\s*\n?(.*?)(?=\n\s*(?:Devenir|Rédigé|Cordialement|Patient:|Episode|Le \d{2}/\d{2}|\n\n)|$)",
+        text,
+        re.DOTALL | re.IGNORECASE,
+    )
+    if m:
+        block = m.group(1).strip()
+        lines = block.split("\n")
+        for line in lines[:10]:  # Limiter à 10 lignes max
+            line = line.strip().lstrip("- •")
+            if not line or len(line) <= 2:
+                continue
+            # Ignorer les footers et lignes non-médicament
+            if re.match(r"^(Patient|Episode|Le \d|Page|V\d)", line):
+                break
+            med = line
+            poso = None
+            # Séparer médicament et posologie
+            poso_match = re.search(r"\s+(si besoin|matin|soir|midi|\d+\s*(?:mg|cp|gel).*)", line, re.IGNORECASE)
+            if poso_match:
+                med = line[:poso_match.start()].strip()
+                poso = poso_match.group(1).strip()
+            # Chercher le code ATC via edsnlp
+            code_atc = _match_drug_atc(med, drug_atc)
+            dossier.traitements_sortie.append(Traitement(
+                medicament=med,
+                posologie=poso,
+                code_atc=code_atc,
+            ))
+
+    # Si rien trouvé, chercher les prescriptions "Presc. de Sortie"
+    if not dossier.traitements_sortie:
+        for m_presc in re.finditer(
+            r"([A-ZÉÈÊËÀÂ][A-ZÉÈÊËÀÂ0-9\s\-/%.]+?)(?:\s+\d+\s*(?:mg|G|CPR|GEL))?.*?Presc\.\s*de\s*Sortie",
+            text,
+        ):
+            med = m_presc.group(1).strip()
+            if len(med) > 3:
+                code_atc = _match_drug_atc(med, drug_atc)
+                dossier.traitements_sortie.append(Traitement(
+                    medicament=med, code_atc=code_atc,
+                ))
+
+
+def _match_drug_atc(med_name: str, drug_atc: dict[str, str]) -> Optional[str]:
+    """Cherche un code ATC correspondant au médicament dans les résultats edsnlp."""
+    if not drug_atc:
+        return None
+    med_lower = med_name.lower().strip()
+    # Correspondance exacte
+    if med_lower in drug_atc:
+        return drug_atc[med_lower]
+    # Correspondance partielle : le nom edsnlp est contenu dans le nom du médicament
+    for drug_text, atc in drug_atc.items():
+        if drug_text in med_lower or med_lower in drug_text:
+            return atc
+    return None
+
+
+def _extract_biologie(text: str, dossier: DossierMedical) -> None:
+    """Extrait les résultats biologiques clés."""
+    bio_patterns = [
+        (r"[Ll]ipas[ée]mie\s*(?:[àa=:])?\s*(\d+)", "Lipasémie", None),
+        (r"CRP\s*[=:à]?\s*(\d+(?:[.,]\d+)?)", "CRP", None),
+        (r"ASAT\s*[=:à]?\s*([\d.,]+)\s*(?:N|U/L)?", "ASAT", None),
+        (r"ALAT\s*[=:à]?\s*([\d.,]+)\s*(?:N|U/L)?", "ALAT", None),
+        (r"GGT\s*[=:à]?\s*(\d+)\s*(?:U/L)?", "GGT", None),
+        (r"PAL\s*[=:à]?\s*(\d+)\s*(?:U/L)?", "PAL", None),
+        (r"[Bb]ilirubine\s+(?:totale\s+)?[àa=:]\s*(\d+)\s*(?:µmol/L)?", "Bilirubine totale", None),
+        (r"troponine\s+(négative|positive|normale)", "Troponine", None),
+    ]
+
+    for pattern, test_name, _ in bio_patterns:
+        m = re.search(pattern, text)
+        if m:
+            value = m.group(1)
+            anomalie = _is_abnormal(test_name, value)
+            dossier.biologie_cle.append(BiologieCle(
+                test=test_name,
+                valeur=value,
+                anomalie=anomalie,
+            ))
+
+
+def _extract_imagerie(text: str, dossier: DossierMedical) -> None:
+    """Extrait les résultats d'imagerie."""
+    # TDM
+    tdm_match = re.search(
+        r"(?:TDM|[Ss]canner|tomodensitométrie).*?(?:retrouve|montre|objective)\s*[:：]?\s*(.*?)(?=\n\s*(?:Cholécystectomie|Au total|Devenir|\n\n))",
+        text,
+        re.DOTALL | re.IGNORECASE,
+    )
+    if tdm_match:
+        conclusion = tdm_match.group(1).strip()
+        # Score de Balthazar
+        score = None
+        m = re.search(r"[Bb]althazar\s*(?:[àa=:])?\s*(\d+|[A-E])", text)
+        if m:
+            score = f"Balthazar {m.group(1)}"
+        dossier.imagerie.append(Imagerie(
+            type="TDM abdominal",
+            conclusion=conclusion[:500],
+            score=score,
+        ))
+
+    # Échographie
+    echo_match = re.search(
+        r"(?:[ée]cho(?:graphie)?)\s*.*?(?:retrouve|montre|objective)\s*[:：]?\s*(.*?)(?=\n\n)",
+        text,
+        re.DOTALL | re.IGNORECASE,
+    )
+    if echo_match:
+        dossier.imagerie.append(Imagerie(
+            type="Échographie",
+            conclusion=echo_match.group(1).strip()[:500],
+        ))
+
+
+def _extract_complications(
+    text: str,
+    dossier: DossierMedical,
+    edsnlp_result: Optional[EdsnlpResult] = None,
+) -> None:
+    """Extrait les complications mentionnées."""
+    text_lower = text.lower()
+
+    # Termes de négation détectés par edsnlp pour chaque entité
+    edsnlp_negated_terms: set[str] = set()
+    if edsnlp_result:
+        for ent in edsnlp_result.cim10_entities:
+            if ent.negation:
+                edsnlp_negated_terms.add(ent.texte.lower())
+
+    complication_terms = [
+        "éruption cutanée",
+        "eruption cutanée",
+        "fièvre",
+        "infection",
+        "hémorragie",
+        "hématome",
+        "abcès",
+        "fistule",
+        "iléus",
+        "occlusion",
+    ]
+
+    for term in complication_terms:
+        if term in text_lower:
+            # Vérifier la négation via edsnlp d'abord
+            if edsnlp_result and _is_negated_by_edsnlp(term, edsnlp_negated_terms):
+                continue
+            # Fallback regex pour la négation
+            pattern = rf"(?:pas de|sans|absence de|aucun[e]?)\s+{re.escape(term)}"
+            if not re.search(pattern, text_lower):
+                dossier.complications.append(term.capitalize())
+
+
+def _is_negated_by_edsnlp(term: str, negated_terms: set[str]) -> bool:
+    """Vérifie si un terme est nié selon edsnlp."""
+    term_lower = term.lower()
+    for neg_term in negated_terms:
+        if term_lower in neg_term or neg_term in term_lower:
+            return True
+    return False
+
+
+def _find_act_date(text: str, act_pattern: str) -> str | None:
+    """Trouve la date associée à un acte."""
+    # Chercher "acte le DD/MM" ou "acte le DD/MM/YYYY"
+    m = re.search(
+        rf"{act_pattern}.*?(?:le\s+)?(\d{{2}}/\d{{2}}(?:/\d{{4}})?)",
+        text,
+        re.IGNORECASE,
+    )
+    if m:
+        return m.group(1)
+
+    # Chercher dans la ligne d'observation juste avant
+    m = re.search(
+        rf"(\d{{2}}/\d{{2}}/\d{{4}}).*?{act_pattern}",
+        text,
+        re.IGNORECASE,
+    )
+    if m:
+        return m.group(1)
+    return None
+
+
+def _lookup_cim10(text: str) -> str | None:
+    """Cherche un code CIM-10 pour un texte donné."""
+    text_lower = text.lower().strip()
+    for terme, code in CIM10_MAP.items():
+        if terme in text_lower:
+            return code
+    return None
+
+
+def _is_abnormal(test: str, value: str) -> bool | None:
+    """Détermine si un résultat biologique est anormal."""
+    try:
+        val = float(value.replace(",", "."))
+    except (ValueError, AttributeError):
+        if value.lower() in ("négative", "negative", "normale", "normal"):
+            return False
+        if value.lower() in ("positive", "positif", "élevée", "elevee"):
+            return True
+        return None
+
+    normals: dict[str, tuple[float, float]] = {
+        "Lipasémie": (0, 60),
+        "CRP": (0, 5),
+        "ASAT": (0, 40),
+        "ALAT": (0, 40),
+        "GGT": (0, 60),
+        "PAL": (0, 150),
+        "Bilirubine totale": (0, 17),
+    }
+
+    if test in normals:
+        lo, hi = normals[test]
+        return val > hi or val < lo
+    return None
--- a/src/medical/edsnlp_pipeline.py
+++ b/src/medical/edsnlp_pipeline.py
@@ -0,0 +1,140 @@
+"""Pipeline edsnlp pour l'extraction médicale (CIM-10, médicaments, négation)."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+_nlp = None
+_available = None
+
+
+@dataclass
+class CIM10Entity:
+    texte: str
+    code: str
+    negation: bool = False
+    hypothese: bool = False
+
+
+@dataclass
+class DrugEntity:
+    texte: str
+    code_atc: Optional[str] = None
+    negation: bool = False
+
+
+@dataclass
+class DateEntity:
+    texte: str
+    value: Optional[str] = None
+
+
+@dataclass
+class EdsnlpResult:
+    cim10_entities: list[CIM10Entity] = field(default_factory=list)
+    drug_entities: list[DrugEntity] = field(default_factory=list)
+    date_entities: list[DateEntity] = field(default_factory=list)
+
+
+def is_available() -> bool:
+    """Vérifie si edsnlp est installé et utilisable."""
+    global _available
+    if _available is not None:
+        return _available
+    try:
+        import edsnlp  # noqa: F401
+        _available = True
+    except ImportError:
+        _available = False
+    return _available
+
+
+def get_pipeline():
+    """Retourne le pipeline edsnlp (singleton lazy-loaded)."""
+    global _nlp
+    if _nlp is not None:
+        return _nlp
+
+    if not is_available():
+        raise RuntimeError("edsnlp n'est pas installé")
+
+    import edsnlp
+
+    logger.info("Initialisation du pipeline edsnlp...")
+    nlp = edsnlp.blank("eds")
+
+    nlp.add_pipe("eds.normalizer")
+    nlp.add_pipe("eds.sentences")
+    nlp.add_pipe("eds.cim10", config=dict(attr="NORM", term_matcher="simstring"))
+    nlp.add_pipe("eds.drugs", config=dict(attr="NORM", term_matcher="exact"))
+    nlp.add_pipe("eds.negation")
+    nlp.add_pipe("eds.hypothesis")
+    nlp.add_pipe("eds.dates")
+
+    _nlp = nlp
+    logger.info("Pipeline edsnlp initialisé avec succès")
+    return _nlp
+
+
+def analyze(text: str) -> EdsnlpResult:
+    """Analyse un texte médical avec edsnlp.
+
+    Retourne les entités CIM-10, médicaments et dates détectées.
+    """
+    result = EdsnlpResult()
+
+    if not is_available():
+        return result
+
+    try:
+        nlp = get_pipeline()
+        doc = nlp(text)
+    except Exception:
+        logger.exception("Erreur lors de l'analyse edsnlp")
+        return result
+
+    for ent in doc.ents:
+        negation = getattr(ent._, "negation", False) or False
+        hypothese = getattr(ent._, "hypothesis", False) or False
+
+        if ent.label_ == "cim10":
+            code = ent.kb_id_ or ""
+            if code:
+                result.cim10_entities.append(CIM10Entity(
+                    texte=ent.text,
+                    code=code,
+                    negation=negation,
+                    hypothese=hypothese,
+                ))
+        elif ent.label_ == "drug":
+            code_atc = ent.kb_id_ or None
+            result.drug_entities.append(DrugEntity(
+                texte=ent.text,
+                code_atc=code_atc,
+                negation=negation,
+            ))
+
+    # Dates
+    for span in doc.spans.get("dates", []):
+        date_value = None
+        if hasattr(span._, "date"):
+            date_obj = span._.date
+            if date_obj is not None:
+                date_value = str(date_obj)
+        result.date_entities.append(DateEntity(
+            texte=span.text,
+            value=date_value,
+        ))
+
+    return result
+
+
+def reset():
+    """Réinitialise le pipeline (utile pour les tests)."""
+    global _nlp, _available
+    _nlp = None
+    _available = None