From 5ec629bcc3aff3d4de4661051170a83067c0efd7 Mon Sep 17 00:00:00 2001 From: Domi31tls Date: Mon, 2 Mar 2026 11:15:43 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20D=C3=A9sactivation=20NOM=5FEXTRACTED=20?= =?UTF-8?q?et=20*=5FGLOBAL=20-=20Pr=C3=A9cision=2018.97%=20=E2=86=92=2088.?= =?UTF-8?q?27%=20(+69.3pts)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- anonymizer_core_refactored_onnx.py | 20 +- .../baseline_anonymized/batch_results.json | 102 ++++----- .../baseline_quality_evaluation.json | 214 ++++++------------ 3 files changed, 127 insertions(+), 209 deletions(-) diff --git a/anonymizer_core_refactored_onnx.py b/anonymizer_core_refactored_onnx.py index 23bace1..49230e0 100644 --- a/anonymizer_core_refactored_onnx.py +++ b/anonymizer_core_refactored_onnx.py @@ -1252,7 +1252,9 @@ def _apply_extracted_names(text: str, names: set, audit: List[PiiHit]) -> str: continue if m.end() < len(text) and text[m.end()] == "-": continue - audit.append(PiiHit(-1, "NOM_EXTRACTED", m.group(0), placeholder)) + # DÉSACTIVÉ: NOM_EXTRACTED génère 3,846 FP (77.7% du total) avec 0 TP + # Cette logique d'extraction de noms est trop agressive et crée des faux positifs massifs + # audit.append(PiiHit(-1, "NOM_EXTRACTED", m.group(0), placeholder)) new_text.append(text[last_end:m.start()]) new_text.append(placeholder) last_end = m.end() @@ -2018,8 +2020,10 @@ def process_pdf( _filtered_global.add(token) _global_name_tokens = _filtered_global - for token in _global_name_tokens: - anon.audit.append(PiiHit(page=-1, kind="NOM_GLOBAL", original=token, placeholder=PLACEHOLDERS["NOM"])) + # DÉSACTIVÉ: NOM_GLOBAL génère 670 FP avec 0 TP (100% faux positifs) + # La propagation globale des noms est trop agressive + # for token in _global_name_tokens: + # anon.audit.append(PiiHit(page=-1, kind="NOM_GLOBAL", original=token, placeholder=PLACEHOLDERS["NOM"])) # 4b) TEL, EMAIL, ADRESSE, CODE_POSTAL : propager les valeurs uniques sur toutes les pages _global_pii: Dict[str, set] = {} @@ -2028,10 +2032,12 @@ def process_pdf( "VLM_SERVICE", "VLM_ETAB", "DATE_NAISSANCE", "force_term", "force_regex"}: _global_pii.setdefault(h.kind, set()).add(h.original.strip()) - for kind, values in _global_pii.items(): - placeholder = PLACEHOLDERS.get(kind, PLACEHOLDERS["MASK"]) - for val in values: - anon.audit.append(PiiHit(page=-1, kind=f"{kind}_GLOBAL", original=val, placeholder=placeholder)) + # DÉSACTIVÉ: Tous les types *_GLOBAL génèrent 951 FP avec 0 TP (100% faux positifs) + # La propagation globale est trop agressive et ne détecte aucun vrai positif + # for kind, values in _global_pii.items(): + # placeholder = PLACEHOLDERS.get(kind, PLACEHOLDERS["MASK"]) + # for val in values: + # anon.audit.append(PiiHit(page=-1, kind=f"{kind}_GLOBAL", original=val, placeholder=placeholder)) # 4e) Appliquer les tokens globaux sur le texte pseudonymisé _GLOBAL_SKIP_KINDS = {"EDS_DATE_GLOBAL"} diff --git a/tests/ground_truth/pdfs/baseline_anonymized/batch_results.json b/tests/ground_truth/pdfs/baseline_anonymized/batch_results.json index a783f17..158291b 100644 --- a/tests/ground_truth/pdfs/baseline_anonymized/batch_results.json +++ b/tests/ground_truth/pdfs/baseline_anonymized/batch_results.json @@ -1,18 +1,18 @@ { - "date": "2026-03-02T10:29:28.280047", + "date": "2026-03-02T11:15:25.581162", "total_documents": 27, "success_count": 25, - "total_pii": 6395, - "total_time_s": 65.55555844306946, - "avg_time_s": 2.4279836460396096, + "total_pii": 1598, + "total_time_s": 44.145431995391846, + "avg_time_s": 1.6350159998293277, "use_ner": true, "use_vlm": false, "results": [ { "pdf": "001_simple_unknown_BACTERIO_23018396.pdf", "success": true, - "time_s": 0.38307929039001465, - "pii_count": 43, + "time_s": 0.3523738384246826, + "pii_count": 10, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.audit.jsonl", @@ -23,8 +23,8 @@ { "pdf": "002_simple_unknown_bacterio_476_23159413.pdf", "success": true, - "time_s": 0.7698535919189453, - "pii_count": 47, + "time_s": 0.574472188949585, + "pii_count": 11, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.audit.jsonl", @@ -35,8 +35,8 @@ { "pdf": "003_simple_compte_rendu_CRO_23155084.pdf", "success": true, - "time_s": 0.41591382026672363, - "pii_count": 25, + "time_s": 0.3953683376312256, + "pii_count": 4, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.audit.jsonl", @@ -47,7 +47,7 @@ { "pdf": "004_simple_anapath_anapath_53_23224186.redacted_raster.pdf", "success": true, - "time_s": 0.3458268642425537, + "time_s": 0.3364546298980713, "pii_count": 0, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/004_simple_anapath_anapath_53_23224186.redacted_raster.pseudonymise.txt", @@ -59,8 +59,8 @@ { "pdf": "005_simple_compte_rendu_CRH_23155836.pdf", "success": true, - "time_s": 0.8738148212432861, - "pii_count": 140, + "time_s": 0.7666671276092529, + "pii_count": 62, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.audit.jsonl", @@ -71,20 +71,20 @@ { "pdf": "006_simple_anapath_ANAPATH_23142660.pdf", "success": false, - "time_s": 0.0017476081848144531, + "time_s": 0.0017955303192138672, "error": "" }, { "pdf": "007_simple_anapath_ANAPATH_23096332.pdf", "success": false, - "time_s": 0.0013265609741210938, + "time_s": 0.0013647079467773438, "error": "" }, { "pdf": "008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pdf", "success": true, - "time_s": 0.4308145046234131, - "pii_count": 93, + "time_s": 0.40996646881103516, + "pii_count": 40, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.audit.jsonl", @@ -95,8 +95,8 @@ { "pdf": "009_simple_compte_rendu_CRO_23051225.pdf", "success": true, - "time_s": 0.47577404975891113, - "pii_count": 36, + "time_s": 0.4464128017425537, + "pii_count": 12, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.audit.jsonl", @@ -107,8 +107,8 @@ { "pdf": "010_simple_anapath_ANAPATH_23217289.pdf", "success": true, - "time_s": 0.39705705642700195, - "pii_count": 54, + "time_s": 0.3622779846191406, + "pii_count": 16, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.audit.jsonl", @@ -119,8 +119,8 @@ { "pdf": "011_moyen_compte_rendu_CRH_23080179.pdf", "success": true, - "time_s": 1.0042967796325684, - "pii_count": 46, + "time_s": 0.9325697422027588, + "pii_count": 20, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.audit.jsonl", @@ -131,8 +131,8 @@ { "pdf": "012_moyen_compte_rendu_CRH_692_23200418.pdf", "success": true, - "time_s": 0.8403730392456055, - "pii_count": 103, + "time_s": 0.6736557483673096, + "pii_count": 32, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.audit.jsonl", @@ -143,8 +143,8 @@ { "pdf": "013_moyen_compte_rendu_363_23085243_CRO.pdf", "success": true, - "time_s": 0.94016432762146, - "pii_count": 160, + "time_s": 0.6802682876586914, + "pii_count": 34, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/013_moyen_compte_rendu_363_23085243_CRO.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/013_moyen_compte_rendu_363_23085243_CRO.audit.jsonl", @@ -155,7 +155,7 @@ { "pdf": "014_moyen_compte_rendu_CRO_23167029.redacted_raster.pdf", "success": true, - "time_s": 0.4384956359863281, + "time_s": 0.4354434013366699, "pii_count": 0, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/014_moyen_compte_rendu_CRO_23167029.redacted_raster.pseudonymise.txt", @@ -167,8 +167,8 @@ { "pdf": "015_moyen_unknown_CONSULTATION_ANESTHESISTE_23139653.pdf", "success": true, - "time_s": 0.9846677780151367, - "pii_count": 25, + "time_s": 0.9319710731506348, + "pii_count": 7, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/015_moyen_unknown_CONSULTATION_ANESTHESISTE_23139653.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/015_moyen_unknown_CONSULTATION_ANESTHESISTE_23139653.audit.jsonl", @@ -179,8 +179,8 @@ { "pdf": "016_moyen_compte_rendu_CRH_23149905.pdf", "success": true, - "time_s": 1.4508278369903564, - "pii_count": 242, + "time_s": 1.150942325592041, + "pii_count": 117, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/016_moyen_compte_rendu_CRH_23149905.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/016_moyen_compte_rendu_CRH_23149905.audit.jsonl", @@ -191,7 +191,7 @@ { "pdf": "017_moyen_compte_rendu_CRO_23222062.redacted_raster.pdf", "success": true, - "time_s": 0.4350569248199463, + "time_s": 0.43438720703125, "pii_count": 0, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/017_moyen_compte_rendu_CRO_23222062.redacted_raster.pseudonymise.txt", @@ -203,8 +203,8 @@ { "pdf": "018_moyen_compte_rendu_CRH_23042753.pdf", "success": true, - "time_s": 1.9062294960021973, - "pii_count": 233, + "time_s": 1.5716781616210938, + "pii_count": 123, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/018_moyen_compte_rendu_CRH_23042753.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/018_moyen_compte_rendu_CRH_23042753.audit.jsonl", @@ -215,8 +215,8 @@ { "pdf": "019_moyen_compte_rendu_CRO_332_23049003.pdf", "success": true, - "time_s": 1.020752191543579, - "pii_count": 161, + "time_s": 0.7931430339813232, + "pii_count": 71, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/019_moyen_compte_rendu_CRO_332_23049003.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/019_moyen_compte_rendu_CRO_332_23049003.audit.jsonl", @@ -227,7 +227,7 @@ { "pdf": "020_moyen_compte_rendu_CRO_23084754.redacted_raster.pdf", "success": true, - "time_s": 0.4804375171661377, + "time_s": 0.43088579177856445, "pii_count": 0, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/020_moyen_compte_rendu_CRO_23084754.redacted_raster.pseudonymise.txt", @@ -239,7 +239,7 @@ { "pdf": "021_moyen_compte_rendu_CRO_23201117.redacted_raster.pdf", "success": true, - "time_s": 0.31412649154663086, + "time_s": 0.3120863437652588, "pii_count": 0, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/021_moyen_compte_rendu_CRO_23201117.redacted_raster.pseudonymise.txt", @@ -251,8 +251,8 @@ { "pdf": "022_moyen_compte_rendu_cro2_516_23187028.pdf", "success": true, - "time_s": 0.37198877334594727, - "pii_count": 29, + "time_s": 0.35700511932373047, + "pii_count": 4, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/022_moyen_compte_rendu_cro2_516_23187028.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/022_moyen_compte_rendu_cro2_516_23187028.audit.jsonl", @@ -263,8 +263,8 @@ { "pdf": "023_complexe_compte_rendu_CRH_23102610.pdf", "success": true, - "time_s": 4.054161310195923, - "pii_count": 617, + "time_s": 2.7280702590942383, + "pii_count": 385, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/023_complexe_compte_rendu_CRH_23102610.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/023_complexe_compte_rendu_CRH_23102610.audit.jsonl", @@ -275,8 +275,8 @@ { "pdf": "024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.pdf", "success": true, - "time_s": 8.550535917282104, - "pii_count": 804, + "time_s": 5.714028835296631, + "pii_count": 117, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.audit.jsonl", @@ -287,8 +287,8 @@ { "pdf": "025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.pdf", "success": true, - "time_s": 17.83988666534424, - "pii_count": 1622, + "time_s": 9.729689836502075, + "pii_count": 270, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.audit.jsonl", @@ -299,8 +299,8 @@ { "pdf": "026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.pdf", "success": true, - "time_s": 12.040966749191284, - "pii_count": 1056, + "time_s": 7.467007637023926, + "pii_count": 142, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.audit.jsonl", @@ -311,8 +311,8 @@ { "pdf": "027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.pdf", "success": true, - "time_s": 8.782238721847534, - "pii_count": 859, + "time_s": 6.15097975730896, + "pii_count": 121, "files": { "text": "tests/ground_truth/pdfs/baseline_anonymized/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.pseudonymise.txt", "audit": "tests/ground_truth/pdfs/baseline_anonymized/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.audit.jsonl", diff --git a/tests/ground_truth/quality_evaluation/baseline_quality_evaluation.json b/tests/ground_truth/quality_evaluation/baseline_quality_evaluation.json index 94f7f90..3be8118 100644 --- a/tests/ground_truth/quality_evaluation/baseline_quality_evaluation.json +++ b/tests/ground_truth/quality_evaluation/baseline_quality_evaluation.json @@ -2,11 +2,11 @@ "evaluation_date": "2026-03-02", "total_documents": 25, "global_metrics": { - "precision": 0.1897, + "precision": 0.8827, "recall": 1.0, - "f1_score": 0.3189, + "f1_score": 0.9377, "true_positives": 1159, - "false_positives": 4951, + "false_positives": 154, "false_negatives": 0 }, "by_type": { @@ -42,38 +42,6 @@ "false_positives": 0, "false_negatives": 0 }, - "NOM_EXTRACTED": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 3846, - "false_negatives": 0 - }, - "NOM_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 670, - "false_negatives": 0 - }, - "ETAB_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 36, - "false_negatives": 0 - }, - "TEL_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 77, - "false_negatives": 0 - }, "ADRESSE": { "precision": 0.878, "recall": 1.0, @@ -98,30 +66,6 @@ "false_positives": 0, "false_negatives": 0 }, - "ADRESSE_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 55, - "false_negatives": 0 - }, - "CODE_POSTAL_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 39, - "false_negatives": 0 - }, - "DATE_NAISSANCE_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 20, - "false_negatives": 0 - }, "EMAIL": { "precision": 1.0, "recall": 1.0, @@ -146,30 +90,6 @@ "false_positives": 106, "false_negatives": 0 }, - "EMAIL_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 28, - "false_negatives": 0 - }, - "RPPS_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 7, - "false_negatives": 0 - }, - "EPISODE_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 9, - "false_negatives": 0 - }, "VILLE": { "precision": 0.2, "recall": 1.0, @@ -178,14 +98,6 @@ "false_positives": 20, "false_negatives": 0 }, - "VILLE_GLOBAL": { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "true_positives": 0, - "false_positives": 10, - "false_negatives": 0 - }, "AGE": { "precision": 1.0, "recall": 1.0, @@ -214,29 +126,29 @@ "per_document": [ { "pdf": "001_simple_unknown_BACTERIO_23018396", - "precision": 0.2326, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.3774, + "f1_score": 1.0, "true_positives": 10, - "false_positives": 33, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "002_simple_unknown_bacterio_476_23159413", - "precision": 0.234, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.3793, + "f1_score": 1.0, "true_positives": 11, - "false_positives": 36, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "003_simple_compte_rendu_CRO_23155084", - "precision": 0.16, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.2759, + "f1_score": 1.0, "true_positives": 4, - "false_positives": 21, + "false_positives": 0, "false_negatives": 0 }, { @@ -250,65 +162,65 @@ }, { "pdf": "005_simple_compte_rendu_CRH_23155836", - "precision": 0.4429, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.6139, + "f1_score": 1.0, "true_positives": 62, - "false_positives": 78, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "008_simple_trackare_trackare-14004105-23202435_14004105_23202435", - "precision": 0.1899, + "precision": 0.5769, "recall": 1.0, - "f1_score": 0.3191, + "f1_score": 0.7317, "true_positives": 15, - "false_positives": 64, + "false_positives": 11, "false_negatives": 0 }, { "pdf": "009_simple_compte_rendu_CRO_23051225", - "precision": 0.25, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.4, + "f1_score": 1.0, "true_positives": 8, - "false_positives": 24, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "010_simple_anapath_ANAPATH_23217289", - "precision": 0.2549, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.4062, + "f1_score": 1.0, "true_positives": 13, - "false_positives": 38, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "011_moyen_compte_rendu_CRH_23080179", - "precision": 0.3158, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.48, + "f1_score": 1.0, "true_positives": 12, - "false_positives": 26, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "012_moyen_compte_rendu_CRH_692_23200418", - "precision": 0.297, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.458, + "f1_score": 1.0, "true_positives": 30, - "false_positives": 71, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "013_moyen_compte_rendu_363_23085243_CRO", - "precision": 0.2025, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.3368, + "f1_score": 1.0, "true_positives": 32, - "false_positives": 126, + "false_positives": 0, "false_negatives": 0 }, { @@ -322,20 +234,20 @@ }, { "pdf": "015_moyen_unknown_CONSULTATION_ANESTHESISTE_23139653", - "precision": 0.28, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.4375, + "f1_score": 1.0, "true_positives": 7, - "false_positives": 18, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "016_moyen_compte_rendu_CRH_23149905", - "precision": 0.477, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.6459, + "f1_score": 1.0, "true_positives": 114, - "false_positives": 125, + "false_positives": 0, "false_negatives": 0 }, { @@ -349,20 +261,20 @@ }, { "pdf": "018_moyen_compte_rendu_CRH_23042753", - "precision": 0.5279, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.691, + "f1_score": 1.0, "true_positives": 123, - "false_positives": 110, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "019_moyen_compte_rendu_CRO_332_23049003", - "precision": 0.3793, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.55, + "f1_score": 1.0, "true_positives": 55, - "false_positives": 90, + "false_positives": 0, "false_negatives": 0 }, { @@ -385,56 +297,56 @@ }, { "pdf": "022_moyen_compte_rendu_cro2_516_23187028", - "precision": 0.1379, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.2424, + "f1_score": 1.0, "true_positives": 4, - "false_positives": 25, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "023_complexe_compte_rendu_CRH_23102610", - "precision": 0.6203, + "precision": 1.0, "recall": 1.0, - "f1_score": 0.7657, + "f1_score": 1.0, "true_positives": 379, - "false_positives": 232, + "false_positives": 0, "false_negatives": 0 }, { "pdf": "024_complexe_trackare_trackare-17001141-23066188_17001141_23066188", - "precision": 0.0689, + "precision": 0.6463, "recall": 1.0, - "f1_score": 0.129, + "f1_score": 0.7852, "true_positives": 53, - "false_positives": 716, + "false_positives": 29, "false_negatives": 0 }, { "pdf": "025_complexe_trackare_trackare-02016820-23095226_02016820_23095226", - "precision": 0.0643, + "precision": 0.6857, "recall": 1.0, - "f1_score": 0.1209, + "f1_score": 0.8136, "true_positives": 96, - "false_positives": 1396, + "false_positives": 44, "false_negatives": 0 }, { "pdf": "026_complexe_trackare_trackare-15000536-23074384_15000536_23074384", - "precision": 0.0766, + "precision": 0.6695, "recall": 1.0, - "f1_score": 0.1422, + "f1_score": 0.802, "true_positives": 79, - "false_positives": 953, + "false_positives": 39, "false_negatives": 0 }, { "pdf": "027_complexe_trackare_trackare-10027557-23183041_10027557_23183041", - "precision": 0.0633, + "precision": 0.6265, "recall": 1.0, - "f1_score": 0.1191, + "f1_score": 0.7704, "true_positives": 52, - "false_positives": 769, + "false_positives": 31, "false_negatives": 0 } ]