158 lines
3.7 KiB
JSON
158 lines
3.7 KiB
JSON
{
|
|
"analysis_date": "2026-03-02",
|
|
"global_metrics": {
|
|
"precision": 0.1897,
|
|
"recall": 1.0,
|
|
"f1_score": 0.3189,
|
|
"true_positives": 1159,
|
|
"false_positives": 4951,
|
|
"false_negatives": 0
|
|
},
|
|
"problems": [
|
|
{
|
|
"priority": "HAUTE",
|
|
"category": "Propagation globale",
|
|
"description": "951 faux positifs dus aux détections *_GLOBAL",
|
|
"types": [
|
|
"NOM_GLOBAL",
|
|
"ETAB_GLOBAL",
|
|
"TEL_GLOBAL",
|
|
"ADRESSE_GLOBAL",
|
|
"CODE_POSTAL_GLOBAL",
|
|
"DATE_NAISSANCE_GLOBAL",
|
|
"EMAIL_GLOBAL",
|
|
"RPPS_GLOBAL",
|
|
"EPISODE_GLOBAL",
|
|
"VILLE_GLOBAL"
|
|
],
|
|
"impact": "19.2% des FP totaux",
|
|
"solution": "Améliorer la logique de propagation globale ou désactiver pour certains types"
|
|
},
|
|
{
|
|
"priority": "HAUTE",
|
|
"category": "Extraction de noms",
|
|
"description": "3846 faux positifs de type NOM_EXTRACTED",
|
|
"types": [
|
|
"NOM_EXTRACTED"
|
|
],
|
|
"impact": "77.7% des FP totaux",
|
|
"solution": "Améliorer les stopwords médicaux et la détection contextuelle"
|
|
},
|
|
{
|
|
"priority": "MOYENNE",
|
|
"category": "Précision faible",
|
|
"description": "10 types avec précision < 50%",
|
|
"types": [
|
|
"NOM_EXTRACTED",
|
|
"NOM_GLOBAL",
|
|
"ETAB_GLOBAL",
|
|
"TEL_GLOBAL",
|
|
"ADRESSE_GLOBAL",
|
|
"CODE_POSTAL_GLOBAL",
|
|
"DATE_NAISSANCE_GLOBAL",
|
|
"EMAIL_GLOBAL",
|
|
"EPISODE",
|
|
"VILLE"
|
|
],
|
|
"impact": "Affecte 4897 FP",
|
|
"solution": "Améliorer les regex et la détection contextuelle pour ces types"
|
|
}
|
|
],
|
|
"improvements": [
|
|
{
|
|
"priority": 2,
|
|
"title": "Enrichir les stopwords médicaux",
|
|
"impact": "Réduction de ~3846 FP NOM_EXTRACTED",
|
|
"effort": "Faible",
|
|
"gain_precision": "+62.9 points",
|
|
"tasks": [
|
|
"Extraire les termes médicaux des documents annotés",
|
|
"Identifier les faux positifs récurrents",
|
|
"Ajouter à _MEDICAL_STOP_WORDS_SET"
|
|
]
|
|
},
|
|
{
|
|
"priority": 4,
|
|
"title": "Implémenter la détection contextuelle",
|
|
"impact": "Réduction de ~126 FP",
|
|
"effort": "Élevé",
|
|
"gain_precision": "+2.1 points",
|
|
"tasks": [
|
|
"Créer detectors/contextual.py",
|
|
"Implémenter la détection avec contexte fort/faible",
|
|
"Filtrer via stopwords médicaux",
|
|
"Intégrer dans le pipeline hybride"
|
|
]
|
|
}
|
|
],
|
|
"false_positives_by_type": {
|
|
"NOM_EXTRACTED": 3846,
|
|
"NOM_GLOBAL": 670,
|
|
"EPISODE": 106,
|
|
"TEL_GLOBAL": 77,
|
|
"ADRESSE_GLOBAL": 55,
|
|
"CODE_POSTAL_GLOBAL": 39,
|
|
"ETAB_GLOBAL": 36,
|
|
"EMAIL_GLOBAL": 28,
|
|
"DATE_NAISSANCE_GLOBAL": 20,
|
|
"VILLE": 20,
|
|
"ADRESSE": 10,
|
|
"CODE_POSTAL": 10,
|
|
"VILLE_GLOBAL": 10,
|
|
"EPISODE_GLOBAL": 9,
|
|
"TEL": 8,
|
|
"RPPS_GLOBAL": 7
|
|
},
|
|
"low_precision_types": [
|
|
{
|
|
"type": "NOM_EXTRACTED",
|
|
"precision": 0.0,
|
|
"fp": 3846
|
|
},
|
|
{
|
|
"type": "NOM_GLOBAL",
|
|
"precision": 0.0,
|
|
"fp": 670
|
|
},
|
|
{
|
|
"type": "ETAB_GLOBAL",
|
|
"precision": 0.0,
|
|
"fp": 36
|
|
},
|
|
{
|
|
"type": "TEL_GLOBAL",
|
|
"precision": 0.0,
|
|
"fp": 77
|
|
},
|
|
{
|
|
"type": "ADRESSE_GLOBAL",
|
|
"precision": 0.0,
|
|
"fp": 55
|
|
},
|
|
{
|
|
"type": "CODE_POSTAL_GLOBAL",
|
|
"precision": 0.0,
|
|
"fp": 39
|
|
},
|
|
{
|
|
"type": "DATE_NAISSANCE_GLOBAL",
|
|
"precision": 0.0,
|
|
"fp": 20
|
|
},
|
|
{
|
|
"type": "EMAIL_GLOBAL",
|
|
"precision": 0.0,
|
|
"fp": 28
|
|
},
|
|
{
|
|
"type": "EPISODE",
|
|
"precision": 0.1452,
|
|
"fp": 106
|
|
},
|
|
{
|
|
"type": "VILLE",
|
|
"precision": 0.2,
|
|
"fp": 20
|
|
}
|
|
]
|
|
} |