feat: Analyse propagation globale - 100% des *_GLOBAL et NOM_EXTRACTED sont des FP

This commit is contained in:
2026-03-02 11:01:14 +01:00
parent 4eba826ca5
commit dfa45041d7
6 changed files with 876 additions and 3 deletions

View File

@@ -0,0 +1,164 @@
{
"analysis_date": "2026-03-02",
"global_types_stats": [
{
"type": "NOM_GLOBAL",
"base_type": "NOM",
"tp": 0,
"fp": 670,
"fn": 0,
"precision": 0.0,
"base_tp": 506,
"base_fp": 0,
"base_precision": 1.0
},
{
"type": "TEL_GLOBAL",
"base_type": "TEL",
"tp": 0,
"fp": 77,
"fn": 0,
"precision": 0.0,
"base_tp": 193,
"base_fp": 8,
"base_precision": 0.9602
},
{
"type": "ADRESSE_GLOBAL",
"base_type": "ADRESSE",
"tp": 0,
"fp": 55,
"fn": 0,
"precision": 0.0,
"base_tp": 72,
"base_fp": 10,
"base_precision": 0.878
},
{
"type": "CODE_POSTAL_GLOBAL",
"base_type": "CODE_POSTAL",
"tp": 0,
"fp": 39,
"fn": 0,
"precision": 0.0,
"base_tp": 50,
"base_fp": 10,
"base_precision": 0.8333
},
{
"type": "ETAB_GLOBAL",
"base_type": "ETAB",
"tp": 0,
"fp": 36,
"fn": 0,
"precision": 0.0,
"base_tp": 0,
"base_fp": 0,
"base_precision": 0.0
},
{
"type": "EMAIL_GLOBAL",
"base_type": "EMAIL",
"tp": 0,
"fp": 28,
"fn": 0,
"precision": 0.0,
"base_tp": 62,
"base_fp": 0,
"base_precision": 1.0
},
{
"type": "DATE_NAISSANCE_GLOBAL",
"base_type": "DATE_NAISSANCE",
"tp": 0,
"fp": 20,
"fn": 0,
"precision": 0.0,
"base_tp": 114,
"base_fp": 0,
"base_precision": 1.0
},
{
"type": "VILLE_GLOBAL",
"base_type": "VILLE",
"tp": 0,
"fp": 10,
"fn": 0,
"precision": 0.0,
"base_tp": 5,
"base_fp": 20,
"base_precision": 0.2
},
{
"type": "EPISODE_GLOBAL",
"base_type": "EPISODE",
"tp": 0,
"fp": 9,
"fn": 0,
"precision": 0.0,
"base_tp": 18,
"base_fp": 106,
"base_precision": 0.1452
},
{
"type": "RPPS_GLOBAL",
"base_type": "RPPS",
"tp": 0,
"fp": 7,
"fn": 0,
"precision": 0.0,
"base_tp": 21,
"base_fp": 0,
"base_precision": 1.0
}
],
"nom_extracted_stats": {
"precision": 0.0,
"recall": 0.0,
"f1_score": 0.0,
"true_positives": 0,
"false_positives": 3846,
"false_negatives": 0
},
"recommendations": [
{
"priority": 1,
"title": "Désactiver les types *_GLOBAL inutiles",
"types": [
"NOM_GLOBAL",
"TEL_GLOBAL",
"ADRESSE_GLOBAL",
"CODE_POSTAL_GLOBAL",
"ETAB_GLOBAL",
"EMAIL_GLOBAL",
"DATE_NAISSANCE_GLOBAL",
"VILLE_GLOBAL",
"EPISODE_GLOBAL",
"RPPS_GLOBAL"
],
"impact": "Réduction de 951 FP",
"gain_precision": "+15.6 points",
"effort": "Faible",
"implementation": "Modifier la fonction de propagation globale pour exclure ces types"
},
{
"priority": 2,
"title": "Désactiver NOM_EXTRACTED",
"types": [
"NOM_EXTRACTED"
],
"impact": "Réduction de 3846 FP",
"gain_precision": "+62.9 points",
"effort": "Faible",
"implementation": "Commenter ou supprimer la logique d'extraction de noms"
}
],
"estimated_gain": {
"current_fp": 4951,
"estimated_fp": 154,
"fp_reduction": 4797,
"current_precision": 0.1897,
"estimated_precision": 0.8827113480578828,
"precision_gain": 0.6930113480578828
}
}

View File

@@ -0,0 +1,283 @@
{
"extraction_date": "2026-03-02",
"total_detections": 3846,
"unique_terms": 316,
"frequent_terms_count": 196,
"medical_terms_count": 7,
"top_50_frequent": [
{
"term": "lucie",
"count": 188,
"is_medical": false
},
{
"term": "schmitt",
"count": 185,
"is_medical": false
},
{
"term": "masse",
"count": 170,
"is_medical": false
},
{
"term": "laurence",
"count": 138,
"is_medical": false
},
{
"term": "belleau",
"count": 135,
"is_medical": false
},
{
"term": "céline",
"count": 124,
"is_medical": false
},
{
"term": "justine",
"count": 96,
"is_medical": false
},
{
"term": "burg",
"count": 96,
"is_medical": false
},
{
"term": "schneider",
"count": 90,
"is_medical": false
},
{
"term": "sophie",
"count": 74,
"is_medical": false
},
{
"term": "aguer",
"count": 74,
"is_medical": false
},
{
"term": "kasparian",
"count": 68,
"is_medical": false
},
{
"term": "alexandra",
"count": 64,
"is_medical": false
},
{
"term": "valette",
"count": 63,
"is_medical": false
},
{
"term": "buccale",
"count": 61,
"is_medical": false
},
{
"term": "samuel",
"count": 61,
"is_medical": false
},
{
"term": "bannier",
"count": 60,
"is_medical": false
},
{
"term": "grihault",
"count": 60,
"is_medical": false
},
{
"term": "pedia",
"count": 59,
"is_medical": false
},
{
"term": "françois",
"count": 57,
"is_medical": false
},
{
"term": "quentin",
"count": 57,
"is_medical": false
},
{
"term": "cazenave",
"count": 55,
"is_medical": false
},
{
"term": "bedouet",
"count": 46,
"is_medical": false
},
{
"term": "jean",
"count": 44,
"is_medical": false
},
{
"term": "hurtado",
"count": 44,
"is_medical": false
},
{
"term": "droit",
"count": 43,
"is_medical": true
},
{
"term": "jean-pierre",
"count": 39,
"is_medical": false
},
{
"term": "echelle",
"count": 37,
"is_medical": false
},
{
"term": "glasgow",
"count": 37,
"is_medical": false
},
{
"term": "carriere",
"count": 35,
"is_medical": false
},
{
"term": "juliette",
"count": 35,
"is_medical": false
},
{
"term": "txomin",
"count": 33,
"is_medical": false
},
{
"term": "maternowski",
"count": 31,
"is_medical": false
},
{
"term": "cuillere",
"count": 29,
"is_medical": false
},
{
"term": "cafe",
"count": 29,
"is_medical": false
},
{
"term": "vomissements",
"count": 26,
"is_medical": false
},
{
"term": "gournay",
"count": 26,
"is_medical": false
},
{
"term": "eva",
"count": 25,
"is_medical": false
},
{
"term": "enf",
"count": 24,
"is_medical": false
},
{
"term": "marie-line",
"count": 24,
"is_medical": false
},
{
"term": "picamilh",
"count": 23,
"is_medical": false
},
{
"term": "eneko",
"count": 23,
"is_medical": false
},
{
"term": "bronswick",
"count": 22,
"is_medical": false
},
{
"term": "larrouy",
"count": 20,
"is_medical": false
},
{
"term": "elodie",
"count": 20,
"is_medical": false
},
{
"term": "preremplie",
"count": 18,
"is_medical": false
},
{
"term": "infectieuses",
"count": 16,
"is_medical": false
},
{
"term": "petriat",
"count": 16,
"is_medical": false
},
{
"term": "cotyle",
"count": 16,
"is_medical": false
},
{
"term": "sylvie",
"count": 15,
"is_medical": false
}
],
"medical_terms_by_category": {
"Titres/Fonctions": [
"droit",
"droite"
],
"Pathologies": [
"anastomose"
],
"Termes généraux": [
"colique",
"hilaire",
"urologique",
"vasculaire"
]
},
"all_medical_terms": [
"anastomose",
"colique",
"droit",
"droite",
"hilaire",
"urologique",
"vasculaire"
]
}

View File

@@ -0,0 +1,12 @@
# Nouveaux stopwords médicaux extraits automatiquement
# À ajouter à _MEDICAL_STOP_WORDS_SET dans anonymizer_core_refactored_onnx.py
NEW_MEDICAL_STOPWORDS = {
"anastomose",
"colique",
"droit",
"droite",
"hilaire",
"urologique",
"vasculaire",
}