{ "analysis_date": "2026-03-02", "global_metrics": { "precision": 0.1897, "recall": 1.0, "f1_score": 0.3189, "true_positives": 1159, "false_positives": 4951, "false_negatives": 0 }, "problems": [ { "priority": "HAUTE", "category": "Propagation globale", "description": "951 faux positifs dus aux détections *_GLOBAL", "types": [ "NOM_GLOBAL", "ETAB_GLOBAL", "TEL_GLOBAL", "ADRESSE_GLOBAL", "CODE_POSTAL_GLOBAL", "DATE_NAISSANCE_GLOBAL", "EMAIL_GLOBAL", "RPPS_GLOBAL", "EPISODE_GLOBAL", "VILLE_GLOBAL" ], "impact": "19.2% des FP totaux", "solution": "Améliorer la logique de propagation globale ou désactiver pour certains types" }, { "priority": "HAUTE", "category": "Extraction de noms", "description": "3846 faux positifs de type NOM_EXTRACTED", "types": [ "NOM_EXTRACTED" ], "impact": "77.7% des FP totaux", "solution": "Améliorer les stopwords médicaux et la détection contextuelle" }, { "priority": "MOYENNE", "category": "Précision faible", "description": "10 types avec précision < 50%", "types": [ "NOM_EXTRACTED", "NOM_GLOBAL", "ETAB_GLOBAL", "TEL_GLOBAL", "ADRESSE_GLOBAL", "CODE_POSTAL_GLOBAL", "DATE_NAISSANCE_GLOBAL", "EMAIL_GLOBAL", "EPISODE", "VILLE" ], "impact": "Affecte 4897 FP", "solution": "Améliorer les regex et la détection contextuelle pour ces types" } ], "improvements": [ { "priority": 2, "title": "Enrichir les stopwords médicaux", "impact": "Réduction de ~3846 FP NOM_EXTRACTED", "effort": "Faible", "gain_precision": "+62.9 points", "tasks": [ "Extraire les termes médicaux des documents annotés", "Identifier les faux positifs récurrents", "Ajouter à _MEDICAL_STOP_WORDS_SET" ] }, { "priority": 4, "title": "Implémenter la détection contextuelle", "impact": "Réduction de ~126 FP", "effort": "Élevé", "gain_precision": "+2.1 points", "tasks": [ "Créer detectors/contextual.py", "Implémenter la détection avec contexte fort/faible", "Filtrer via stopwords médicaux", "Intégrer dans le pipeline hybride" ] } ], "false_positives_by_type": { "NOM_EXTRACTED": 3846, "NOM_GLOBAL": 670, "EPISODE": 106, "TEL_GLOBAL": 77, "ADRESSE_GLOBAL": 55, "CODE_POSTAL_GLOBAL": 39, "ETAB_GLOBAL": 36, "EMAIL_GLOBAL": 28, "DATE_NAISSANCE_GLOBAL": 20, "VILLE": 20, "ADRESSE": 10, "CODE_POSTAL": 10, "VILLE_GLOBAL": 10, "EPISODE_GLOBAL": 9, "TEL": 8, "RPPS_GLOBAL": 7 }, "low_precision_types": [ { "type": "NOM_EXTRACTED", "precision": 0.0, "fp": 3846 }, { "type": "NOM_GLOBAL", "precision": 0.0, "fp": 670 }, { "type": "ETAB_GLOBAL", "precision": 0.0, "fp": 36 }, { "type": "TEL_GLOBAL", "precision": 0.0, "fp": 77 }, { "type": "ADRESSE_GLOBAL", "precision": 0.0, "fp": 55 }, { "type": "CODE_POSTAL_GLOBAL", "precision": 0.0, "fp": 39 }, { "type": "DATE_NAISSANCE_GLOBAL", "precision": 0.0, "fp": 20 }, { "type": "EMAIL_GLOBAL", "precision": 0.0, "fp": 28 }, { "type": "EPISODE", "precision": 0.1452, "fp": 106 }, { "type": "VILLE", "precision": 0.2, "fp": 20 } ] }