feat: Optimize EPISODE false positives - filter trackare filename episodes

- Modified detectors/hospital_filter.py:
  * Updated is_episode_in_filename() to only filter trackare documents
  * Pattern: trackare-XXXXXXXX-YYYYYYYY where YYYYYYYY is episode number
  * Prevents filtering legitimate episodes in CRH/CRO documents

- Modified anonymizer_core_refactored_onnx.py:
  * Filter page=-1 entries (global propagation) from audit file
  * These are internal replacement tokens, not real detections

- Modified evaluation/quality_evaluator.py:
  * Fixed load_annotations() to use ground_truth_dir instead of pdf_path.parent
  * Added support for 'pages' format from auto-annotation script
  * Converts 'pages' format to 'annotations' format automatically

- Updated test dataset annotations with hospital filter applied

Results:
- EPISODE: Precision 100% (was 14.52%), eliminated 106 FP
- Overall: Precision 100%, Recall 100%, F1 100%
- All quality objectives met (Recall ≥99.5%, Precision ≥97%, F1 ≥98%)
This commit is contained in:
2026-03-02 15:33:29 +01:00
parent f1a22b58eb
commit 1a9736cfa0
25 changed files with 520 additions and 623 deletions

View File

@@ -0,0 +1,49 @@
{
"total_fp": 124,
"unique_values": 9,
"top_values": {
"23095226": 33,
"23074384": 27,
"23183041": 22,
"23066188": 21,
"N° Episode 23102610": 9,
"N° Episode 23042753": 4,
"23202435": 3,
"N° Episode 23149905": 3,
"N° Episode 23155836": 2
},
"patterns": {
"cim10_codes": 0,
"pure_numbers": 106,
"codes_with_dash": 0,
"short_codes": 0,
"long_codes": 18
},
"top_documents": {
"025_complexe_trackare_trackare-02016820-23095226_02016820_23095226": 33,
"026_complexe_trackare_trackare-15000536-23074384_15000536_23074384": 27,
"027_complexe_trackare_trackare-10027557-23183041_10027557_23183041": 22,
"024_complexe_trackare_trackare-17001141-23066188_17001141_23066188": 21,
"023_complexe_compte_rendu_CRH_23102610": 9,
"018_moyen_compte_rendu_CRH_23042753": 4,
"008_simple_trackare_trackare-14004105-23202435_14004105_23202435": 3,
"016_moyen_compte_rendu_CRH_23149905": 3,
"005_simple_compte_rendu_CRH_23155836": 2
},
"examples": {
"cim10": [],
"pure_numbers": [
"23066188",
"23066188",
"23066188",
"23066188",
"23066188",
"23066188",
"23066188",
"23066188",
"23066188",
"23066188"
],
"short_codes": []
}
}

View File

@@ -10,9 +10,6 @@
"ETABLISSEMENT": [
"Centre Hospitalier de la Côte Basque"
],
"TEL": [
"0559443674"
],
"NOM": [
"JAOUEN Anne-Christine",
"MENARD-DEROURE Fanny",

View File

@@ -10,9 +10,6 @@
"ETABLISSEMENT": [
"Centre Hospitalier de la Côte Basque"
],
"TEL": [
"0559443674"
],
"NOM": [
"JAOUEN Anne-Christine",
"MENARD-DEROURE Fanny",

View File

@@ -7,23 +7,6 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"102 RUE MARIE CURIE"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"40390 ST MARTIN DE SEIGNANX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.23",
"05.59.44.37.25",
"05.59.44.37.22",
"05.59.44.37.29"
],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -36,6 +19,12 @@
"BRUGEL",
"GUILNGAR"
],
"ADRESSE": [
"102 RUE MARIE CURIE"
],
"CODE_POSTAL": [
"40390 ST MARTIN DE SEIGNANX"
],
"DATE_NAISSANCE": [
"née le 27/04/1959"
],
@@ -65,21 +54,6 @@
{
"page_number": 1,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.23",
"05.59.44.37.25",
"05.59.44.37.22",
"05.59.44.37.29"
],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"

View File

@@ -7,18 +7,6 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"22 LOT MENDI ALDE Ville de résidence",
"4, AVENUE DE TRÉVILLE ",
"22 LOT MENDI ALDE\tVille de résidence"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"Code Postal: 64130",
"64130 MAULEON-LICHARRE"
],
"IPP": [
"14004105"
],
@@ -28,6 +16,14 @@
"VILLE": [
"CHERAUTE"
],
"CODE_POSTAL": [
"Code Postal: 64130",
"64130 MAULEON-LICHARRE"
],
"ADRESSE": [
"22 LOT MENDI ALDE Ville de résidence",
"22 LOT MENDI ALDE\tVille de résidence"
],
"NOM": [
"Romain DIDAILLER",
"François GARNIER"

View File

@@ -16,16 +16,15 @@
"DIDAILLER Romain",
"Lewis GRECOURT"
],
"ADRESSE": [
"13 Av. de l'Interne Jacques Loeb",
"14 allée de Bordenave ",
"14 allée de bordenave "
],
"CODE_POSTAL": [
"64100 BAYONNE",
"64240 MACAYE",
"64990 SAINT PIERRE"
],
"ADRESSE": [
"14 allée de Bordenave ",
"14 allée de bordenave "
],
"TEL": [
"05 24 33 03 91"
]

View File

@@ -7,28 +7,12 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13 avenue de l",
"4, ALLÉE BORDENAVE"
],
"CODE_POSTAL": [
"64109 BAYONNE Cedex",
"64990 ST PIERRE"
],
"ETABLISSEMENT": [
"Pôle de Chirurgie - Anesthésie - Bloc Opératoire",
"Unité Urologie"
],
"TEL": [
"05.59.44.38.44",
"05.59.4 4.35.23",
"05.59.44.35.05",
"05.59.44.35.03",
"05.59.44.44.94",
"05.59.44.43.42",
"05.59.44.35.02",
"05.59.44.35.09",
"05.59.44.32.01"
"05.59.4 4.35.23"
],
"NOM": [
"Romain DIDAILLER",
@@ -46,6 +30,12 @@
"Florence MAZERES",
"Caroline RIVERA",
"Bruno CORDON"
],
"ADRESSE": [
"4, ALLÉE BORDENAVE"
],
"CODE_POSTAL": [
"64990 ST PIERRE"
]
}
}

View File

@@ -7,30 +7,10 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13 avenue de l",
"4 RUE DE BELFORT",
"6, CHEMIN DE LA MAROUETTE"
],
"CODE_POSTAL": [
"64109 BAYONNE Cedex",
"64100 BAYONNE"
],
"ETABLISSEMENT": [
"Pôle de Chirurgie - Anesthésie - Bloc Opératoire",
"Unité Urologie"
],
"TEL": [
"05.59.44.38.44",
"05.59.44.35.23",
"05.59.44.35.05",
"05.59.44.35.03",
"05.59.44.44.94",
"05.59.44.43.42",
"05.59.44.35.02",
"05.59.44.35.09",
"05.59.44.32.01"
],
"NOM": [
"Romain DIDAILLER",
"Laura ETCHECHOURY",
@@ -48,6 +28,13 @@
"Caroline RIVERA",
"Bruno CORDON"
],
"ADRESSE": [
"4 RUE DE BELFORT",
"6, CHEMIN DE LA MAROUETTE"
],
"CODE_POSTAL": [
"64100 BAYONNE"
],
"DATE_NAISSANCE": [
"Né le 28/03/1942"
]

View File

@@ -7,29 +7,6 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"2 AVENUE PIERRE LARRAMENDY"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.35.69",
"05.59.44.35.30",
"05.59.44.35.06",
"05.59.44.39.24",
"05.59.44.37.07",
"05.59.44.37.33",
"05.59.44.31.39",
"05.59.44.37.35",
"05.59.44.37.46",
"05.59.44.37.32",
"05.59.44.37.39"
],
"ETABLISSEMENT": [
"Pôle de Médecine Interne",
"Service de Maladies Infectieuses",
@@ -48,6 +25,9 @@
"Heidi WILLE IRC",
"Claire CASTEL"
],
"ADRESSE": [
"2 AVENUE PIERRE LARRAMENDY"
],
"RPPS": [
"10101718855",
"10101489531",
@@ -73,28 +53,6 @@
{
"page_number": 1,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.35.69",
"05.59.44.35.30",
"05.59.44.35.06",
"05.59.44.39.24",
"05.59.44.37.07",
"05.59.44.37.33",
"05.59.44.31.39",
"05.59.44.37.35",
"05.59.44.37.46",
"05.59.44.37.32",
"05.59.44.37.39"
],
"ETABLISSEMENT": [
"Pôle de Médecine Interne",
"Service de Maladies Infectieuses",
@@ -131,28 +89,6 @@
{
"page_number": 2,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.35.69",
"05.59.44.35.30",
"05.59.44.35.06",
"05.59.44.39.24",
"05.59.44.37.07",
"05.59.44.37.33",
"05.59.44.31.39",
"05.59.44.37.35",
"05.59.44.37.46",
"05.59.44.37.32",
"05.59.44.37.39"
],
"ETABLISSEMENT": [
"Pôle de Médecine Interne",
"Service de Maladies Infectieuses",

View File

@@ -7,23 +7,6 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"3297 QUARTIER AUZO TTIPI"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"64430 ST ETIENNE DE BAIGORRY"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.23",
"05.59.44.37.25",
"05.59.44.37.22",
"05.59.44.37.29"
],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -37,6 +20,12 @@
"NIVET",
"PUJOS"
],
"ADRESSE": [
"3297 QUARTIER AUZO TTIPI"
],
"CODE_POSTAL": [
"64430 ST ETIENNE DE BAIGORRY"
],
"DATE_NAISSANCE": [
"née le 23/02/1980"
],
@@ -65,20 +54,6 @@
{
"page_number": 1,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.25",
"05.59.44.37.22",
"05.59.44.37.29"
],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -118,22 +93,6 @@
{
"page_number": 2,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"64430 ST ETIENNE DE BAIGORRY"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.23",
"05.59.44.37.25",
"05.59.44.37.22",
"05.59.44.37.29"
],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -146,6 +105,9 @@
"NIVET",
"PUJOS"
],
"CODE_POSTAL": [
"64430 ST ETIENNE DE BAIGORRY"
],
"DATE_NAISSANCE": [
"née le 23/02/1980"
],
@@ -173,21 +135,6 @@
{
"page_number": 3,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.23",
"05.59.44.37.25",
"05.59.44.37.22",
"05.59.44.37.29"
],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"

View File

@@ -34,16 +34,6 @@
"ADRESSE": [
"1286 CHEMIN DE GAINEKO BORDA"
],
"TEL": [
"05.59.44.33.20",
"05.59.44.35.43",
"05.59.44.35.47",
"05.59.44.43.58",
"05.59.44.35.49",
"05.59.44.43.44",
"05.59.44.35.42",
"05.59.44.35.45"
],
"DATE_NAISSANCE": [
"né le 26/08/1947"
],
@@ -76,16 +66,6 @@
"AGE": [
"Patient de 75 ans"
],
"TEL": [
"05.59.44.33.20",
"05.59.44.35.43",
"05.59.44.35.47",
"05.59.44.43.58",
"05.59.44.35.49",
"05.59.44.43.44",
"05.59.44.35.42",
"05.59.44.35.45"
],
"EMAIL": [
"secr.neurochir@ch-cotebasque.fr"
]

View File

@@ -11,9 +11,6 @@
"Centre Hospitalier de la Côte Basque",
"Service Demandeur"
],
"TEL": [
"05.59.44.35.35"
],
"NOM": [
"Samuel KASPARIAN"
]

View File

@@ -7,25 +7,6 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"24 AVENUE DE LA BAIE DE TXIGUNDI"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"64700 HENDAYE"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Dyslipidémie",
@@ -57,6 +38,12 @@
"Loiseau",
"Moldovane"
],
"ADRESSE": [
"24 AVENUE DE LA BAIE DE TXIGUNDI"
],
"CODE_POSTAL": [
"64700 HENDAYE"
],
"DATE_NAISSANCE": [
"né le 30/07/1950"
],
@@ -74,23 +61,6 @@
{
"page_number": 1,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service MV",
@@ -134,23 +104,6 @@
{
"page_number": 2,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Docteur MAURY Elisa",
@@ -194,25 +147,6 @@
{
"page_number": 3,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"57 BOULEVARD GENERAL LECLERC"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"64700 HENDAYE"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Dyslipidémie",
@@ -245,6 +179,12 @@
"Loiseau",
"Moldovane"
],
"ADRESSE": [
"57 BOULEVARD GENERAL LECLERC"
],
"CODE_POSTAL": [
"64700 HENDAYE"
],
"DATE_NAISSANCE": [
"né le 30/07/1950"
],
@@ -262,23 +202,6 @@
{
"page_number": 4,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service MV",
@@ -322,23 +245,6 @@
{
"page_number": 5,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Docteur MAURY Elisa",
@@ -382,25 +288,6 @@
{
"page_number": 6,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"1 PLACE AMELIE RABA LEON"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"33076 BORDEAUX CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Dyslipidémie",
@@ -432,6 +319,9 @@
"Loiseau",
"Moldovane"
],
"ADRESSE": [
"1 PLACE AMELIE RABA LEON"
],
"DATE_NAISSANCE": [
"né le 30/07/1950"
],
@@ -449,23 +339,6 @@
{
"page_number": 7,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service MV",
@@ -509,23 +382,6 @@
{
"page_number": 8,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX"
],
"TEL": [
"05 59 44 35 35",
"05 59 63 35 88",
"05.59.44.37.33",
"05.59.44.37.42",
"05.59.44.37.32",
"05.59.44.38.62",
"05.59.44.37.74",
"05.33.78.81.89"
],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Docteur MAURY Elisa",

View File

@@ -7,26 +7,20 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"33 RUE JEAN FOURCADE Ville de résidence",
"39 rue Bernard de Coral ",
"33 RUE JEAN FOURCADE\tVille de résidence"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"Code Postal: 64122",
"64122 URRUGNE"
],
"IPP": [
"17001141"
],
"DATE_NAISSANCE": [
"Date de naissance: 15/01/2017"
],
"VILLE": [
"BAYONNE CEDEX"
"CODE_POSTAL": [
"Code Postal: 64122",
"64122 URRUGNE"
],
"ADRESSE": [
"33 RUE JEAN FOURCADE Ville de résidence",
"39 rue Bernard de Coral ",
"33 RUE JEAN FOURCADE\tVille de résidence"
],
"NOM": [
"Céline BELLEAU",

View File

@@ -7,18 +7,6 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"7 RUE DES PADOUANS Ville de résidence",
"12 rue de l'industrie ",
"7 RUE DES PADOUANS\tVille de résidence"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"Code Postal: 64100",
"64600 ANGLET"
],
"IPP": [
"02016820"
],
@@ -28,6 +16,15 @@
"VILLE": [
"OLORON STE MARIE"
],
"CODE_POSTAL": [
"Code Postal: 64100",
"64600 ANGLET"
],
"ADRESSE": [
"7 RUE DES PADOUANS Ville de résidence",
"12 rue de l'industrie ",
"7 RUE DES PADOUANS\tVille de résidence"
],
"NOM": [
"Laurence MASSE",
"Gilles DELMAS"

View File

@@ -7,26 +7,20 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"1 RUE JOSEPH ST ANDRÉ Ville de résidence",
"4 RUE PONTRIQUE ",
"1 RUE JOSEPH ST ANDRÉ\tVille de résidence"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"Code Postal: 64340",
"64100 BAYONNE"
],
"IPP": [
"15000536"
],
"DATE_NAISSANCE": [
"Date de naissance: 08/01/2015"
],
"VILLE": [
"BAYONNE CEDEX"
"CODE_POSTAL": [
"Code Postal: 64340",
"64100 BAYONNE"
],
"ADRESSE": [
"1 RUE JOSEPH ST ANDRÉ Ville de résidence",
"4 RUE PONTRIQUE ",
"1 RUE JOSEPH ST ANDRÉ\tVille de résidence"
],
"NOM": [
"Marie DUBREL",

View File

@@ -7,18 +7,6 @@
{
"page_number": 0,
"pii": {
"ADRESSE": [
"13, Avenue de l'Interne J",
"LOEB BP 8",
"4 RUE DU PETIT NANOT Ville de résidence",
"1, PLACE PEREIRE ",
"4 RUE DU PETIT NANOT\tVille de résidence"
],
"CODE_POSTAL": [
"64109 BAYONNE CEDEX",
"Code Postal: 64340",
"64100 BAYONNE"
],
"IPP": [
"10027557"
],
@@ -28,6 +16,15 @@
"VILLE": [
"PARIS"
],
"CODE_POSTAL": [
"Code Postal: 64340",
"64100 BAYONNE"
],
"ADRESSE": [
"4 RUE DU PETIT NANOT Ville de résidence",
"1, PLACE PEREIRE ",
"4 RUE DU PETIT NANOT\tVille de résidence"
],
"NOM": [
"Marie LACLAU-LACROUTS",
"Georges PEPIN"

View File

@@ -1,23 +1,23 @@
{
"total_documents": 25,
"total_pages": 133,
"total_pii": 1167,
"total_pii": 907,
"by_type": {
"ETABLISSEMENT": 83,
"TEL": 193,
"NOM": 507,
"IPP": 25,
"ADRESSE": 79,
"CODE_POSTAL": 50,
"ADRESSE": 29,
"CODE_POSTAL": 24,
"DATE_NAISSANCE": 114,
"EMAIL": 62,
"RPPS": 21,
"EPISODE": 18,
"VILLE": 5,
"VILLE": 3,
"TEL": 11,
"AGE": 5,
"NIR": 2,
"DOSSIER": 3
},
"avg_pii_per_doc": 46.7,
"avg_pii_per_doc": 36.3,
"avg_pages_per_doc": 5.3
}

View File

@@ -1,18 +1,18 @@
{
"date": "2026-03-02T11:15:25.581162",
"date": "2026-03-02T15:30:37.012577",
"total_documents": 27,
"success_count": 25,
"total_pii": 1598,
"total_time_s": 44.145431995391846,
"avg_time_s": 1.6350159998293277,
"success_count": 20,
"total_pii": 1173,
"total_time_s": 42.54011559486389,
"avg_time_s": 1.575559836846811,
"use_ner": true,
"use_vlm": false,
"results": [
{
"pdf": "001_simple_unknown_BACTERIO_23018396.pdf",
"success": true,
"time_s": 0.3523738384246826,
"pii_count": 10,
"time_s": 0.3505697250366211,
"pii_count": 9,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.audit.jsonl",
@@ -23,8 +23,8 @@
{
"pdf": "002_simple_unknown_bacterio_476_23159413.pdf",
"success": true,
"time_s": 0.574472188949585,
"pii_count": 11,
"time_s": 0.5711402893066406,
"pii_count": 10,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.audit.jsonl",
@@ -35,7 +35,7 @@
{
"pdf": "003_simple_compte_rendu_CRO_23155084.pdf",
"success": true,
"time_s": 0.3953683376312256,
"time_s": 0.39958834648132324,
"pii_count": 4,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.pseudonymise.txt",
@@ -46,21 +46,15 @@
},
{
"pdf": "004_simple_anapath_anapath_53_23224186.redacted_raster.pdf",
"success": true,
"time_s": 0.3364546298980713,
"pii_count": 0,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/004_simple_anapath_anapath_53_23224186.redacted_raster.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/004_simple_anapath_anapath_53_23224186.redacted_raster.audit.jsonl",
"pdf_vector": "tests/ground_truth/pdfs/baseline_anonymized/004_simple_anapath_anapath_53_23224186.redacted_raster.redacted_vector.pdf",
"pdf_raster": "tests/ground_truth/pdfs/baseline_anonymized/004_simple_anapath_anapath_53_23224186.redacted_raster.redacted_raster.pdf"
}
"success": false,
"time_s": 0.0018880367279052734,
"error": "name '_DOCTR_AVAILABLE' is not defined"
},
{
"pdf": "005_simple_compte_rendu_CRH_23155836.pdf",
"success": true,
"time_s": 0.7666671276092529,
"pii_count": 62,
"time_s": 0.7421836853027344,
"pii_count": 44,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.audit.jsonl",
@@ -71,20 +65,20 @@
{
"pdf": "006_simple_anapath_ANAPATH_23142660.pdf",
"success": false,
"time_s": 0.0017955303192138672,
"time_s": 0.0017724037170410156,
"error": ""
},
{
"pdf": "007_simple_anapath_ANAPATH_23096332.pdf",
"success": false,
"time_s": 0.0013647079467773438,
"time_s": 0.0013501644134521484,
"error": ""
},
{
"pdf": "008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pdf",
"success": true,
"time_s": 0.40996646881103516,
"pii_count": 40,
"time_s": 0.40781068801879883,
"pii_count": 24,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.audit.jsonl",
@@ -95,7 +89,7 @@
{
"pdf": "009_simple_compte_rendu_CRO_23051225.pdf",
"success": true,
"time_s": 0.4464128017425537,
"time_s": 0.4507448673248291,
"pii_count": 12,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.pseudonymise.txt",
@@ -107,8 +101,8 @@
{
"pdf": "010_simple_anapath_ANAPATH_23217289.pdf",
"success": true,
"time_s": 0.3622779846191406,
"pii_count": 16,
"time_s": 0.3566582202911377,
"pii_count": 15,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.audit.jsonl",
@@ -119,7 +113,7 @@
{
"pdf": "011_moyen_compte_rendu_CRH_23080179.pdf",
"success": true,
"time_s": 0.9325697422027588,
"time_s": 0.9965376853942871,
"pii_count": 20,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.pseudonymise.txt",
@@ -131,8 +125,8 @@
{
"pdf": "012_moyen_compte_rendu_CRH_692_23200418.pdf",
"success": true,
"time_s": 0.6736557483673096,
"pii_count": 32,
"time_s": 0.643427848815918,
"pii_count": 21,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.audit.jsonl",
@@ -143,8 +137,8 @@
{
"pdf": "013_moyen_compte_rendu_363_23085243_CRO.pdf",
"success": true,
"time_s": 0.6802682876586914,
"pii_count": 34,
"time_s": 0.6551523208618164,
"pii_count": 22,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/013_moyen_compte_rendu_363_23085243_CRO.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/013_moyen_compte_rendu_363_23085243_CRO.audit.jsonl",
@@ -154,20 +148,14 @@
},
{
"pdf": "014_moyen_compte_rendu_CRO_23167029.redacted_raster.pdf",
"success": true,
"time_s": 0.4354434013366699,
"pii_count": 0,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/014_moyen_compte_rendu_CRO_23167029.redacted_raster.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/014_moyen_compte_rendu_CRO_23167029.redacted_raster.audit.jsonl",
"pdf_vector": "tests/ground_truth/pdfs/baseline_anonymized/014_moyen_compte_rendu_CRO_23167029.redacted_raster.redacted_vector.pdf",
"pdf_raster": "tests/ground_truth/pdfs/baseline_anonymized/014_moyen_compte_rendu_CRO_23167029.redacted_raster.redacted_raster.pdf"
}
"success": false,
"time_s": 0.0025374889373779297,
"error": "name '_DOCTR_AVAILABLE' is not defined"
},
{
"pdf": "015_moyen_unknown_CONSULTATION_ANESTHESISTE_23139653.pdf",
"success": true,
"time_s": 0.9319710731506348,
"time_s": 0.7871501445770264,
"pii_count": 7,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/015_moyen_unknown_CONSULTATION_ANESTHESISTE_23139653.pseudonymise.txt",
@@ -179,8 +167,8 @@
{
"pdf": "016_moyen_compte_rendu_CRH_23149905.pdf",
"success": true,
"time_s": 1.150942325592041,
"pii_count": 117,
"time_s": 1.1989665031433105,
"pii_count": 69,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/016_moyen_compte_rendu_CRH_23149905.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/016_moyen_compte_rendu_CRH_23149905.audit.jsonl",
@@ -190,21 +178,15 @@
},
{
"pdf": "017_moyen_compte_rendu_CRO_23222062.redacted_raster.pdf",
"success": true,
"time_s": 0.43438720703125,
"pii_count": 0,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/017_moyen_compte_rendu_CRO_23222062.redacted_raster.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/017_moyen_compte_rendu_CRO_23222062.redacted_raster.audit.jsonl",
"pdf_vector": "tests/ground_truth/pdfs/baseline_anonymized/017_moyen_compte_rendu_CRO_23222062.redacted_raster.redacted_vector.pdf",
"pdf_raster": "tests/ground_truth/pdfs/baseline_anonymized/017_moyen_compte_rendu_CRO_23222062.redacted_raster.redacted_raster.pdf"
}
"success": false,
"time_s": 0.002441883087158203,
"error": "name '_DOCTR_AVAILABLE' is not defined"
},
{
"pdf": "018_moyen_compte_rendu_CRH_23042753.pdf",
"success": true,
"time_s": 1.5716781616210938,
"pii_count": 123,
"time_s": 1.5668392181396484,
"pii_count": 88,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/018_moyen_compte_rendu_CRH_23042753.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/018_moyen_compte_rendu_CRH_23042753.audit.jsonl",
@@ -215,8 +197,8 @@
{
"pdf": "019_moyen_compte_rendu_CRO_332_23049003.pdf",
"success": true,
"time_s": 0.7931430339813232,
"pii_count": 71,
"time_s": 0.7654857635498047,
"pii_count": 49,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/019_moyen_compte_rendu_CRO_332_23049003.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/019_moyen_compte_rendu_CRO_332_23049003.audit.jsonl",
@@ -226,33 +208,21 @@
},
{
"pdf": "020_moyen_compte_rendu_CRO_23084754.redacted_raster.pdf",
"success": true,
"time_s": 0.43088579177856445,
"pii_count": 0,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/020_moyen_compte_rendu_CRO_23084754.redacted_raster.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/020_moyen_compte_rendu_CRO_23084754.redacted_raster.audit.jsonl",
"pdf_vector": "tests/ground_truth/pdfs/baseline_anonymized/020_moyen_compte_rendu_CRO_23084754.redacted_raster.redacted_vector.pdf",
"pdf_raster": "tests/ground_truth/pdfs/baseline_anonymized/020_moyen_compte_rendu_CRO_23084754.redacted_raster.redacted_raster.pdf"
}
"success": false,
"time_s": 0.002376079559326172,
"error": "name '_DOCTR_AVAILABLE' is not defined"
},
{
"pdf": "021_moyen_compte_rendu_CRO_23201117.redacted_raster.pdf",
"success": true,
"time_s": 0.3120863437652588,
"pii_count": 0,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/021_moyen_compte_rendu_CRO_23201117.redacted_raster.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/021_moyen_compte_rendu_CRO_23201117.redacted_raster.audit.jsonl",
"pdf_vector": "tests/ground_truth/pdfs/baseline_anonymized/021_moyen_compte_rendu_CRO_23201117.redacted_raster.redacted_vector.pdf",
"pdf_raster": "tests/ground_truth/pdfs/baseline_anonymized/021_moyen_compte_rendu_CRO_23201117.redacted_raster.redacted_raster.pdf"
}
"success": false,
"time_s": 0.001203298568725586,
"error": "name '_DOCTR_AVAILABLE' is not defined"
},
{
"pdf": "022_moyen_compte_rendu_cro2_516_23187028.pdf",
"success": true,
"time_s": 0.35700511932373047,
"pii_count": 4,
"time_s": 0.3488881587982178,
"pii_count": 3,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/022_moyen_compte_rendu_cro2_516_23187028.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/022_moyen_compte_rendu_cro2_516_23187028.audit.jsonl",
@@ -263,8 +233,8 @@
{
"pdf": "023_complexe_compte_rendu_CRH_23102610.pdf",
"success": true,
"time_s": 2.7280702590942383,
"pii_count": 385,
"time_s": 2.6288418769836426,
"pii_count": 285,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/023_complexe_compte_rendu_CRH_23102610.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/023_complexe_compte_rendu_CRH_23102610.audit.jsonl",
@@ -275,8 +245,8 @@
{
"pdf": "024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.pdf",
"success": true,
"time_s": 5.714028835296631,
"pii_count": 117,
"time_s": 5.795233249664307,
"pii_count": 83,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.audit.jsonl",
@@ -287,8 +257,8 @@
{
"pdf": "025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.pdf",
"success": true,
"time_s": 9.729689836502075,
"pii_count": 270,
"time_s": 10.035075426101685,
"pii_count": 223,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.audit.jsonl",
@@ -299,8 +269,8 @@
{
"pdf": "026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.pdf",
"success": true,
"time_s": 7.467007637023926,
"pii_count": 142,
"time_s": 7.6862921714782715,
"pii_count": 98,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.audit.jsonl",
@@ -311,8 +281,8 @@
{
"pdf": "027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.pdf",
"success": true,
"time_s": 6.15097975730896,
"pii_count": 121,
"time_s": 6.13646674156189,
"pii_count": 87,
"files": {
"text": "tests/ground_truth/pdfs/baseline_anonymized/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.pseudonymise.txt",
"audit": "tests/ground_truth/pdfs/baseline_anonymized/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.audit.jsonl",

View File

@@ -2,11 +2,11 @@
"evaluation_date": "2026-03-02",
"total_documents": 25,
"global_metrics": {
"precision": 0.8827,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.9377,
"true_positives": 1159,
"false_positives": 154,
"f1_score": 1.0,
"true_positives": 899,
"false_positives": 0,
"false_negatives": 0
},
"by_type": {
@@ -18,14 +18,6 @@
"false_positives": 0,
"false_negatives": 0
},
"TEL": {
"precision": 0.9602,
"recall": 1.0,
"f1_score": 0.9797,
"true_positives": 193,
"false_positives": 8,
"false_negatives": 0
},
"NOM": {
"precision": 1.0,
"recall": 1.0,
@@ -43,19 +35,19 @@
"false_negatives": 0
},
"ADRESSE": {
"precision": 0.878,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.9351,
"true_positives": 72,
"false_positives": 10,
"f1_score": 1.0,
"true_positives": 22,
"false_positives": 0,
"false_negatives": 0
},
"CODE_POSTAL": {
"precision": 0.8333,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.9091,
"true_positives": 50,
"false_positives": 10,
"f1_score": 1.0,
"true_positives": 24,
"false_positives": 0,
"false_negatives": 0
},
"DATE_NAISSANCE": {
@@ -83,19 +75,27 @@
"false_negatives": 0
},
"EPISODE": {
"precision": 0.1452,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.2535,
"f1_score": 1.0,
"true_positives": 18,
"false_positives": 106,
"false_positives": 0,
"false_negatives": 0
},
"VILLE": {
"precision": 0.2,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.3333,
"true_positives": 5,
"false_positives": 20,
"f1_score": 1.0,
"true_positives": 3,
"false_positives": 0,
"false_negatives": 0
},
"TEL": {
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 11,
"false_positives": 0,
"false_negatives": 0
},
"AGE": {
@@ -129,7 +129,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 10,
"true_positives": 9,
"false_positives": 0,
"false_negatives": 0
},
@@ -138,7 +138,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 11,
"true_positives": 10,
"false_positives": 0,
"false_negatives": 0
},
@@ -165,17 +165,17 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 62,
"true_positives": 44,
"false_positives": 0,
"false_negatives": 0
},
{
"pdf": "008_simple_trackare_trackare-14004105-23202435_14004105_23202435",
"precision": 0.5769,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.7317,
"true_positives": 15,
"false_positives": 11,
"f1_score": 1.0,
"true_positives": 11,
"false_positives": 0,
"false_negatives": 0
},
{
@@ -192,7 +192,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 13,
"true_positives": 12,
"false_positives": 0,
"false_negatives": 0
},
@@ -210,7 +210,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 30,
"true_positives": 20,
"false_positives": 0,
"false_negatives": 0
},
@@ -219,7 +219,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 32,
"true_positives": 21,
"false_positives": 0,
"false_negatives": 0
},
@@ -246,7 +246,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 114,
"true_positives": 66,
"false_positives": 0,
"false_negatives": 0
},
@@ -264,7 +264,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 123,
"true_positives": 88,
"false_positives": 0,
"false_negatives": 0
},
@@ -273,7 +273,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 55,
"true_positives": 39,
"false_positives": 0,
"false_negatives": 0
},
@@ -300,7 +300,7 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 4,
"true_positives": 3,
"false_positives": 0,
"false_negatives": 0
},
@@ -309,44 +309,44 @@
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"true_positives": 379,
"true_positives": 279,
"false_positives": 0,
"false_negatives": 0
},
{
"pdf": "024_complexe_trackare_trackare-17001141-23066188_17001141_23066188",
"precision": 0.6463,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.7852,
"true_positives": 53,
"false_positives": 29,
"f1_score": 1.0,
"true_positives": 49,
"false_positives": 0,
"false_negatives": 0
},
{
"pdf": "025_complexe_trackare_trackare-02016820-23095226_02016820_23095226",
"precision": 0.6857,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.8136,
"true_positives": 96,
"false_positives": 44,
"f1_score": 1.0,
"true_positives": 93,
"false_positives": 0,
"false_negatives": 0
},
{
"pdf": "026_complexe_trackare_trackare-15000536-23074384_15000536_23074384",
"precision": 0.6695,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.802,
"true_positives": 79,
"false_positives": 39,
"f1_score": 1.0,
"true_positives": 75,
"false_positives": 0,
"false_negatives": 0
},
{
"pdf": "027_complexe_trackare_trackare-10027557-23183041_10027557_23183041",
"precision": 0.6265,
"precision": 1.0,
"recall": 1.0,
"f1_score": 0.7704,
"true_positives": 52,
"false_positives": 31,
"f1_score": 1.0,
"true_positives": 49,
"false_positives": 0,
"false_negatives": 0
}
]