feat: Analyse baseline - 77.7% FP dus à NOM_EXTRACTED, 19.2% à propagation globale
This commit is contained in:
158
tests/ground_truth/analysis/baseline_analysis.json
Normal file
158
tests/ground_truth/analysis/baseline_analysis.json
Normal file
@@ -0,0 +1,158 @@
|
||||
{
|
||||
"analysis_date": "2026-03-02",
|
||||
"global_metrics": {
|
||||
"precision": 0.1897,
|
||||
"recall": 1.0,
|
||||
"f1_score": 0.3189,
|
||||
"true_positives": 1159,
|
||||
"false_positives": 4951,
|
||||
"false_negatives": 0
|
||||
},
|
||||
"problems": [
|
||||
{
|
||||
"priority": "HAUTE",
|
||||
"category": "Propagation globale",
|
||||
"description": "951 faux positifs dus aux détections *_GLOBAL",
|
||||
"types": [
|
||||
"NOM_GLOBAL",
|
||||
"ETAB_GLOBAL",
|
||||
"TEL_GLOBAL",
|
||||
"ADRESSE_GLOBAL",
|
||||
"CODE_POSTAL_GLOBAL",
|
||||
"DATE_NAISSANCE_GLOBAL",
|
||||
"EMAIL_GLOBAL",
|
||||
"RPPS_GLOBAL",
|
||||
"EPISODE_GLOBAL",
|
||||
"VILLE_GLOBAL"
|
||||
],
|
||||
"impact": "19.2% des FP totaux",
|
||||
"solution": "Améliorer la logique de propagation globale ou désactiver pour certains types"
|
||||
},
|
||||
{
|
||||
"priority": "HAUTE",
|
||||
"category": "Extraction de noms",
|
||||
"description": "3846 faux positifs de type NOM_EXTRACTED",
|
||||
"types": [
|
||||
"NOM_EXTRACTED"
|
||||
],
|
||||
"impact": "77.7% des FP totaux",
|
||||
"solution": "Améliorer les stopwords médicaux et la détection contextuelle"
|
||||
},
|
||||
{
|
||||
"priority": "MOYENNE",
|
||||
"category": "Précision faible",
|
||||
"description": "10 types avec précision < 50%",
|
||||
"types": [
|
||||
"NOM_EXTRACTED",
|
||||
"NOM_GLOBAL",
|
||||
"ETAB_GLOBAL",
|
||||
"TEL_GLOBAL",
|
||||
"ADRESSE_GLOBAL",
|
||||
"CODE_POSTAL_GLOBAL",
|
||||
"DATE_NAISSANCE_GLOBAL",
|
||||
"EMAIL_GLOBAL",
|
||||
"EPISODE",
|
||||
"VILLE"
|
||||
],
|
||||
"impact": "Affecte 4897 FP",
|
||||
"solution": "Améliorer les regex et la détection contextuelle pour ces types"
|
||||
}
|
||||
],
|
||||
"improvements": [
|
||||
{
|
||||
"priority": 2,
|
||||
"title": "Enrichir les stopwords médicaux",
|
||||
"impact": "Réduction de ~3846 FP NOM_EXTRACTED",
|
||||
"effort": "Faible",
|
||||
"gain_precision": "+62.9 points",
|
||||
"tasks": [
|
||||
"Extraire les termes médicaux des documents annotés",
|
||||
"Identifier les faux positifs récurrents",
|
||||
"Ajouter à _MEDICAL_STOP_WORDS_SET"
|
||||
]
|
||||
},
|
||||
{
|
||||
"priority": 4,
|
||||
"title": "Implémenter la détection contextuelle",
|
||||
"impact": "Réduction de ~126 FP",
|
||||
"effort": "Élevé",
|
||||
"gain_precision": "+2.1 points",
|
||||
"tasks": [
|
||||
"Créer detectors/contextual.py",
|
||||
"Implémenter la détection avec contexte fort/faible",
|
||||
"Filtrer via stopwords médicaux",
|
||||
"Intégrer dans le pipeline hybride"
|
||||
]
|
||||
}
|
||||
],
|
||||
"false_positives_by_type": {
|
||||
"NOM_EXTRACTED": 3846,
|
||||
"NOM_GLOBAL": 670,
|
||||
"EPISODE": 106,
|
||||
"TEL_GLOBAL": 77,
|
||||
"ADRESSE_GLOBAL": 55,
|
||||
"CODE_POSTAL_GLOBAL": 39,
|
||||
"ETAB_GLOBAL": 36,
|
||||
"EMAIL_GLOBAL": 28,
|
||||
"DATE_NAISSANCE_GLOBAL": 20,
|
||||
"VILLE": 20,
|
||||
"ADRESSE": 10,
|
||||
"CODE_POSTAL": 10,
|
||||
"VILLE_GLOBAL": 10,
|
||||
"EPISODE_GLOBAL": 9,
|
||||
"TEL": 8,
|
||||
"RPPS_GLOBAL": 7
|
||||
},
|
||||
"low_precision_types": [
|
||||
{
|
||||
"type": "NOM_EXTRACTED",
|
||||
"precision": 0.0,
|
||||
"fp": 3846
|
||||
},
|
||||
{
|
||||
"type": "NOM_GLOBAL",
|
||||
"precision": 0.0,
|
||||
"fp": 670
|
||||
},
|
||||
{
|
||||
"type": "ETAB_GLOBAL",
|
||||
"precision": 0.0,
|
||||
"fp": 36
|
||||
},
|
||||
{
|
||||
"type": "TEL_GLOBAL",
|
||||
"precision": 0.0,
|
||||
"fp": 77
|
||||
},
|
||||
{
|
||||
"type": "ADRESSE_GLOBAL",
|
||||
"precision": 0.0,
|
||||
"fp": 55
|
||||
},
|
||||
{
|
||||
"type": "CODE_POSTAL_GLOBAL",
|
||||
"precision": 0.0,
|
||||
"fp": 39
|
||||
},
|
||||
{
|
||||
"type": "DATE_NAISSANCE_GLOBAL",
|
||||
"precision": 0.0,
|
||||
"fp": 20
|
||||
},
|
||||
{
|
||||
"type": "EMAIL_GLOBAL",
|
||||
"precision": 0.0,
|
||||
"fp": 28
|
||||
},
|
||||
{
|
||||
"type": "EPISODE",
|
||||
"precision": 0.1452,
|
||||
"fp": 106
|
||||
},
|
||||
{
|
||||
"type": "VILLE",
|
||||
"precision": 0.2,
|
||||
"fp": 20
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user