diff --git a/anonymizer_core_refactored_onnx.py b/anonymizer_core_refactored_onnx.py
index 80cf722..af3e04d 100644
--- a/anonymizer_core_refactored_onnx.py
+++ b/anonymizer_core_refactored_onnx.py
@@ -2169,8 +2169,12 @@ def process_pdf(
for hit in anon.audit
]
- # Filtrer
- filtered_detections = hospital_filter.filter_detections(detections, pdf_path.name)
+ # Filtrer (passer le flag is_trackare)
+ filtered_detections = hospital_filter.filter_detections(
+ detections,
+ pdf_path.name,
+ is_trackare=anon.is_trackare
+ )
# Reconstruire la liste anon.audit
filtered_audit = []
@@ -2199,8 +2203,13 @@ def process_pdf(
txt_path = out_dir / f"{base}.pseudonymise.txt"
audit_path = out_dir / f"{base}.audit.jsonl"
txt_path.write_text(final_text, encoding="utf-8")
+
+ # Filtrer les entrées de propagation globale (page=-1) avant d'écrire l'audit
+ # Ces entrées sont utilisées pour le remplacement dans le texte mais ne sont pas des détections réelles
+ audit_for_file = [hit for hit in anon.audit if hit.page != -1]
+
with audit_path.open("w", encoding="utf-8") as f:
- for hit in anon.audit:
+ for hit in audit_for_file:
f.write(json.dumps(hit.__dict__, ensure_ascii=False) + "\n")
outputs = {"text": str(txt_path), "audit": str(audit_path)}
diff --git a/detectors/hospital_filter.py b/detectors/hospital_filter.py
index 80cfc2b..859c0ad 100644
--- a/detectors/hospital_filter.py
+++ b/detectors/hospital_filter.py
@@ -129,15 +129,28 @@ class HospitalFilter:
"""
Vérifie si le numéro d'épisode provient du nom de fichier.
- Ces numéros apparaissent dans les métadonnées mais pas dans le contenu patient.
+ Ces numéros apparaissent dans les métadonnées/en-têtes mais pas dans le contenu patient.
+ Cas spécial : documents trackare où le numéro d'épisode est répété sur chaque page.
"""
if not filename:
return False
- # Vérifier si le texte apparaît dans le nom de fichier
- if text in filename:
- return True
+ # Extraire juste le nom de fichier sans extension
+ filename_base = Path(filename).stem if isinstance(filename, str) else filename
+ # Pattern trackare : trackare-XXXXXXXX-YYYYYYYY où YYYYYYYY est le numéro d'épisode
+ trackare_match = re.search(r'trackare-\d+-(\d+)', filename_base, re.IGNORECASE)
+ if trackare_match:
+ episode_from_filename = trackare_match.group(1)
+ # Vérifier si le texte détecté correspond au numéro d'épisode du fichier
+ if text.strip() == episode_from_filename:
+ return True
+ # Vérifier aussi avec le pattern "N° Episode XXXXXXXX"
+ if f"N° Episode {episode_from_filename}" in text or f"N° Épisode {episode_from_filename}" in text:
+ return True
+
+ # Ne PAS filtrer les épisodes dans les autres types de documents (CRH, CRO, etc.)
+ # Ces documents contiennent des épisodes légitimes dans le contenu patient
return False
def should_filter(self, pii_type: str, text: str, filename: str = "", page: int = -1) -> bool:
@@ -153,12 +166,6 @@ class HospitalFilter:
Returns:
True si la détection doit être filtrée (faux positif)
"""
- # Les détections en page -1 sont souvent des métadonnées
- if page == -1:
- # Les épisodes en métadonnées sont souvent des faux positifs
- if pii_type == "EPISODE" and self.is_episode_in_filename(text, filename):
- return True
-
# Filtrer par type
if pii_type == "ADRESSE":
return self.is_hospital_address(text)
@@ -173,17 +180,20 @@ class HospitalFilter:
return self.is_hospital_phone(text)
elif pii_type == "EPISODE":
+ # Filtrer les épisodes qui proviennent du nom de fichier
+ # (répétés dans les en-têtes/pieds de page des documents trackare)
return self.is_episode_in_filename(text, filename)
return False
- def filter_detections(self, detections: List[Dict], filename: str = "") -> List[Dict]:
+ def filter_detections(self, detections: List[Dict], filename: str = "", is_trackare: bool = False) -> List[Dict]:
"""
Filtre une liste de détections pour éliminer les faux positifs.
Args:
detections: Liste de détections (format: {'kind': ..., 'original': ..., 'page': ...})
filename: Nom du fichier source
+ is_trackare: True si le document est un export Trackare/TrakCare
Returns:
Liste de détections filtrées
@@ -195,6 +205,11 @@ class HospitalFilter:
text = det.get('original', '')
page = det.get('page', -1)
+ # Pour les documents trackare, filtrer les EPISODE qui correspondent au nom de fichier
+ if is_trackare and pii_type == "EPISODE":
+ if self.is_episode_in_filename(text, filename):
+ continue # Filtrer ce faux positif
+
if not self.should_filter(pii_type, text, filename, page):
filtered.append(det)
diff --git a/evaluation/quality_evaluator.py b/evaluation/quality_evaluator.py
index 3f8833d..a03940a 100644
--- a/evaluation/quality_evaluator.py
+++ b/evaluation/quality_evaluator.py
@@ -113,14 +113,36 @@ class QualityEvaluator:
Returns:
Annotations ou None si non trouvées
"""
- annotation_file = pdf_path.parent / f"{pdf_path.stem}.annotations.json"
+ # Chercher dans le répertoire ground_truth configuré
+ annotation_file = self.ground_truth_dir / f"{pdf_path.stem}.json"
+
+ if not annotation_file.exists():
+ # Fallback: chercher avec le suffixe .annotations.json
+ annotation_file = self.ground_truth_dir / f"{pdf_path.stem}.annotations.json"
if not annotation_file.exists():
return None
try:
with open(annotation_file, 'r', encoding='utf-8') as f:
- return json.load(f)
+ data = json.load(f)
+
+ # Convertir le format "pages" en format "annotations" si nécessaire
+ if "pages" in data and "annotations" not in data:
+ annotations = []
+ for page in data["pages"]:
+ page_num = page["page_number"]
+ for pii_type, texts in page["pii"].items():
+ for text in texts:
+ annotations.append({
+ "page": page_num,
+ "type": pii_type,
+ "text": text,
+ "context": ""
+ })
+ data["annotations"] = annotations
+
+ return data
except Exception as e:
print(f"✗ Erreur lors du chargement des annotations {annotation_file}: {e}")
return None
diff --git a/tests/ground_truth/analysis/episode_fp_analysis.json b/tests/ground_truth/analysis/episode_fp_analysis.json
new file mode 100644
index 0000000..b1e3366
--- /dev/null
+++ b/tests/ground_truth/analysis/episode_fp_analysis.json
@@ -0,0 +1,49 @@
+{
+ "total_fp": 124,
+ "unique_values": 9,
+ "top_values": {
+ "23095226": 33,
+ "23074384": 27,
+ "23183041": 22,
+ "23066188": 21,
+ "N° Episode 23102610": 9,
+ "N° Episode 23042753": 4,
+ "23202435": 3,
+ "N° Episode 23149905": 3,
+ "N° Episode 23155836": 2
+ },
+ "patterns": {
+ "cim10_codes": 0,
+ "pure_numbers": 106,
+ "codes_with_dash": 0,
+ "short_codes": 0,
+ "long_codes": 18
+ },
+ "top_documents": {
+ "025_complexe_trackare_trackare-02016820-23095226_02016820_23095226": 33,
+ "026_complexe_trackare_trackare-15000536-23074384_15000536_23074384": 27,
+ "027_complexe_trackare_trackare-10027557-23183041_10027557_23183041": 22,
+ "024_complexe_trackare_trackare-17001141-23066188_17001141_23066188": 21,
+ "023_complexe_compte_rendu_CRH_23102610": 9,
+ "018_moyen_compte_rendu_CRH_23042753": 4,
+ "008_simple_trackare_trackare-14004105-23202435_14004105_23202435": 3,
+ "016_moyen_compte_rendu_CRH_23149905": 3,
+ "005_simple_compte_rendu_CRH_23155836": 2
+ },
+ "examples": {
+ "cim10": [],
+ "pure_numbers": [
+ "23066188",
+ "23066188",
+ "23066188",
+ "23066188",
+ "23066188",
+ "23066188",
+ "23066188",
+ "23066188",
+ "23066188",
+ "23066188"
+ ],
+ "short_codes": []
+ }
+}
\ No newline at end of file
diff --git a/tests/ground_truth/annotations/001_simple_unknown_BACTERIO_23018396.json b/tests/ground_truth/annotations/001_simple_unknown_BACTERIO_23018396.json
index 3f804ba..f3882ab 100644
--- a/tests/ground_truth/annotations/001_simple_unknown_BACTERIO_23018396.json
+++ b/tests/ground_truth/annotations/001_simple_unknown_BACTERIO_23018396.json
@@ -10,9 +10,6 @@
"ETABLISSEMENT": [
"Centre Hospitalier de la Côte Basque"
],
- "TEL": [
- "0559443674"
- ],
"NOM": [
"JAOUEN Anne-Christine",
"MENARD-DEROURE Fanny",
diff --git a/tests/ground_truth/annotations/002_simple_unknown_bacterio_476_23159413.json b/tests/ground_truth/annotations/002_simple_unknown_bacterio_476_23159413.json
index 97dce6e..206b5e9 100644
--- a/tests/ground_truth/annotations/002_simple_unknown_bacterio_476_23159413.json
+++ b/tests/ground_truth/annotations/002_simple_unknown_bacterio_476_23159413.json
@@ -10,9 +10,6 @@
"ETABLISSEMENT": [
"Centre Hospitalier de la Côte Basque"
],
- "TEL": [
- "0559443674"
- ],
"NOM": [
"JAOUEN Anne-Christine",
"MENARD-DEROURE Fanny",
diff --git a/tests/ground_truth/annotations/005_simple_compte_rendu_CRH_23155836.json b/tests/ground_truth/annotations/005_simple_compte_rendu_CRH_23155836.json
index 2657a27..c90ca91 100644
--- a/tests/ground_truth/annotations/005_simple_compte_rendu_CRH_23155836.json
+++ b/tests/ground_truth/annotations/005_simple_compte_rendu_CRH_23155836.json
@@ -7,23 +7,6 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "102 RUE MARIE CURIE"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "40390 ST MARTIN DE SEIGNANX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.23",
- "05.59.44.37.25",
- "05.59.44.37.22",
- "05.59.44.37.29"
- ],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -36,6 +19,12 @@
"BRUGEL",
"GUILNGAR"
],
+ "ADRESSE": [
+ "102 RUE MARIE CURIE"
+ ],
+ "CODE_POSTAL": [
+ "40390 ST MARTIN DE SEIGNANX"
+ ],
"DATE_NAISSANCE": [
"née le 27/04/1959"
],
@@ -65,21 +54,6 @@
{
"page_number": 1,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.23",
- "05.59.44.37.25",
- "05.59.44.37.22",
- "05.59.44.37.29"
- ],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
diff --git a/tests/ground_truth/annotations/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.json b/tests/ground_truth/annotations/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.json
index 28c992e..d11d228 100644
--- a/tests/ground_truth/annotations/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.json
+++ b/tests/ground_truth/annotations/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.json
@@ -7,18 +7,6 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "22 LOT MENDI ALDE Ville de résidence",
- "4, AVENUE DE TRÉVILLE ",
- "22 LOT MENDI ALDE\tVille de résidence"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "Code Postal: 64130",
- "64130 MAULEON-LICHARRE"
- ],
"IPP": [
"14004105"
],
@@ -28,6 +16,14 @@
"VILLE": [
"CHERAUTE"
],
+ "CODE_POSTAL": [
+ "Code Postal: 64130",
+ "64130 MAULEON-LICHARRE"
+ ],
+ "ADRESSE": [
+ "22 LOT MENDI ALDE Ville de résidence",
+ "22 LOT MENDI ALDE\tVille de résidence"
+ ],
"NOM": [
"Romain DIDAILLER",
"François GARNIER"
diff --git a/tests/ground_truth/annotations/010_simple_anapath_ANAPATH_23217289.json b/tests/ground_truth/annotations/010_simple_anapath_ANAPATH_23217289.json
index 7723bf1..fb4286d 100644
--- a/tests/ground_truth/annotations/010_simple_anapath_ANAPATH_23217289.json
+++ b/tests/ground_truth/annotations/010_simple_anapath_ANAPATH_23217289.json
@@ -16,16 +16,15 @@
"DIDAILLER Romain",
"Lewis GRECOURT"
],
- "ADRESSE": [
- "13 Av. de l'Interne Jacques Loeb",
- "14 allée de Bordenave ",
- "14 allée de bordenave "
- ],
"CODE_POSTAL": [
"64100 BAYONNE",
"64240 MACAYE",
"64990 SAINT PIERRE"
],
+ "ADRESSE": [
+ "14 allée de Bordenave ",
+ "14 allée de bordenave "
+ ],
"TEL": [
"05 24 33 03 91"
]
diff --git a/tests/ground_truth/annotations/012_moyen_compte_rendu_CRH_692_23200418.json b/tests/ground_truth/annotations/012_moyen_compte_rendu_CRH_692_23200418.json
index 386fbba..25c80dd 100644
--- a/tests/ground_truth/annotations/012_moyen_compte_rendu_CRH_692_23200418.json
+++ b/tests/ground_truth/annotations/012_moyen_compte_rendu_CRH_692_23200418.json
@@ -7,28 +7,12 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13 avenue de l",
- "4, ALLÉE BORDENAVE"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE Cedex",
- "64990 ST PIERRE"
- ],
"ETABLISSEMENT": [
"Pôle de Chirurgie - Anesthésie - Bloc Opératoire",
"Unité Urologie"
],
"TEL": [
- "05.59.44.38.44",
- "05.59.4 4.35.23",
- "05.59.44.35.05",
- "05.59.44.35.03",
- "05.59.44.44.94",
- "05.59.44.43.42",
- "05.59.44.35.02",
- "05.59.44.35.09",
- "05.59.44.32.01"
+ "05.59.4 4.35.23"
],
"NOM": [
"Romain DIDAILLER",
@@ -46,6 +30,12 @@
"Florence MAZERES",
"Caroline RIVERA",
"Bruno CORDON"
+ ],
+ "ADRESSE": [
+ "4, ALLÉE BORDENAVE"
+ ],
+ "CODE_POSTAL": [
+ "64990 ST PIERRE"
]
}
}
diff --git a/tests/ground_truth/annotations/013_moyen_compte_rendu_363_23085243_CRO.json b/tests/ground_truth/annotations/013_moyen_compte_rendu_363_23085243_CRO.json
index fa32c24..00065e8 100644
--- a/tests/ground_truth/annotations/013_moyen_compte_rendu_363_23085243_CRO.json
+++ b/tests/ground_truth/annotations/013_moyen_compte_rendu_363_23085243_CRO.json
@@ -7,30 +7,10 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13 avenue de l",
- "4 RUE DE BELFORT",
- "6, CHEMIN DE LA MAROUETTE"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE Cedex",
- "64100 BAYONNE"
- ],
"ETABLISSEMENT": [
"Pôle de Chirurgie - Anesthésie - Bloc Opératoire",
"Unité Urologie"
],
- "TEL": [
- "05.59.44.38.44",
- "05.59.44.35.23",
- "05.59.44.35.05",
- "05.59.44.35.03",
- "05.59.44.44.94",
- "05.59.44.43.42",
- "05.59.44.35.02",
- "05.59.44.35.09",
- "05.59.44.32.01"
- ],
"NOM": [
"Romain DIDAILLER",
"Laura ETCHECHOURY",
@@ -48,6 +28,13 @@
"Caroline RIVERA",
"Bruno CORDON"
],
+ "ADRESSE": [
+ "4 RUE DE BELFORT",
+ "6, CHEMIN DE LA MAROUETTE"
+ ],
+ "CODE_POSTAL": [
+ "64100 BAYONNE"
+ ],
"DATE_NAISSANCE": [
"Né le 28/03/1942"
]
diff --git a/tests/ground_truth/annotations/016_moyen_compte_rendu_CRH_23149905.json b/tests/ground_truth/annotations/016_moyen_compte_rendu_CRH_23149905.json
index 9dc02a4..96e62f0 100644
--- a/tests/ground_truth/annotations/016_moyen_compte_rendu_CRH_23149905.json
+++ b/tests/ground_truth/annotations/016_moyen_compte_rendu_CRH_23149905.json
@@ -7,29 +7,6 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "2 AVENUE PIERRE LARRAMENDY"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.35.69",
- "05.59.44.35.30",
- "05.59.44.35.06",
- "05.59.44.39.24",
- "05.59.44.37.07",
- "05.59.44.37.33",
- "05.59.44.31.39",
- "05.59.44.37.35",
- "05.59.44.37.46",
- "05.59.44.37.32",
- "05.59.44.37.39"
- ],
"ETABLISSEMENT": [
"Pôle de Médecine Interne",
"Service de Maladies Infectieuses",
@@ -48,6 +25,9 @@
"Heidi WILLE IRC",
"Claire CASTEL"
],
+ "ADRESSE": [
+ "2 AVENUE PIERRE LARRAMENDY"
+ ],
"RPPS": [
"10101718855",
"10101489531",
@@ -73,28 +53,6 @@
{
"page_number": 1,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.35.69",
- "05.59.44.35.30",
- "05.59.44.35.06",
- "05.59.44.39.24",
- "05.59.44.37.07",
- "05.59.44.37.33",
- "05.59.44.31.39",
- "05.59.44.37.35",
- "05.59.44.37.46",
- "05.59.44.37.32",
- "05.59.44.37.39"
- ],
"ETABLISSEMENT": [
"Pôle de Médecine Interne",
"Service de Maladies Infectieuses",
@@ -131,28 +89,6 @@
{
"page_number": 2,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.35.69",
- "05.59.44.35.30",
- "05.59.44.35.06",
- "05.59.44.39.24",
- "05.59.44.37.07",
- "05.59.44.37.33",
- "05.59.44.31.39",
- "05.59.44.37.35",
- "05.59.44.37.46",
- "05.59.44.37.32",
- "05.59.44.37.39"
- ],
"ETABLISSEMENT": [
"Pôle de Médecine Interne",
"Service de Maladies Infectieuses",
diff --git a/tests/ground_truth/annotations/018_moyen_compte_rendu_CRH_23042753.json b/tests/ground_truth/annotations/018_moyen_compte_rendu_CRH_23042753.json
index 3f8bb26..3a81f55 100644
--- a/tests/ground_truth/annotations/018_moyen_compte_rendu_CRH_23042753.json
+++ b/tests/ground_truth/annotations/018_moyen_compte_rendu_CRH_23042753.json
@@ -7,23 +7,6 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "3297 QUARTIER AUZO TTIPI"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "64430 ST ETIENNE DE BAIGORRY"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.23",
- "05.59.44.37.25",
- "05.59.44.37.22",
- "05.59.44.37.29"
- ],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -37,6 +20,12 @@
"NIVET",
"PUJOS"
],
+ "ADRESSE": [
+ "3297 QUARTIER AUZO TTIPI"
+ ],
+ "CODE_POSTAL": [
+ "64430 ST ETIENNE DE BAIGORRY"
+ ],
"DATE_NAISSANCE": [
"née le 23/02/1980"
],
@@ -65,20 +54,6 @@
{
"page_number": 1,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.25",
- "05.59.44.37.22",
- "05.59.44.37.29"
- ],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -118,22 +93,6 @@
{
"page_number": 2,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "64430 ST ETIENNE DE BAIGORRY"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.23",
- "05.59.44.37.25",
- "05.59.44.37.22",
- "05.59.44.37.29"
- ],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
@@ -146,6 +105,9 @@
"NIVET",
"PUJOS"
],
+ "CODE_POSTAL": [
+ "64430 ST ETIENNE DE BAIGORRY"
+ ],
"DATE_NAISSANCE": [
"née le 23/02/1980"
],
@@ -173,21 +135,6 @@
{
"page_number": 3,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.23",
- "05.59.44.37.25",
- "05.59.44.37.22",
- "05.59.44.37.29"
- ],
"ETABLISSEMENT": [
"Pôle Spécialités Médicales",
"Service de Gastro-Entérologie - Oncologie Digestive"
diff --git a/tests/ground_truth/annotations/019_moyen_compte_rendu_CRO_332_23049003.json b/tests/ground_truth/annotations/019_moyen_compte_rendu_CRO_332_23049003.json
index c690ec9..5f30beb 100644
--- a/tests/ground_truth/annotations/019_moyen_compte_rendu_CRO_332_23049003.json
+++ b/tests/ground_truth/annotations/019_moyen_compte_rendu_CRO_332_23049003.json
@@ -34,16 +34,6 @@
"ADRESSE": [
"1286 CHEMIN DE GAINEKO BORDA"
],
- "TEL": [
- "05.59.44.33.20",
- "05.59.44.35.43",
- "05.59.44.35.47",
- "05.59.44.43.58",
- "05.59.44.35.49",
- "05.59.44.43.44",
- "05.59.44.35.42",
- "05.59.44.35.45"
- ],
"DATE_NAISSANCE": [
"né le 26/08/1947"
],
@@ -76,16 +66,6 @@
"AGE": [
"Patient de 75 ans"
],
- "TEL": [
- "05.59.44.33.20",
- "05.59.44.35.43",
- "05.59.44.35.47",
- "05.59.44.43.58",
- "05.59.44.35.49",
- "05.59.44.43.44",
- "05.59.44.35.42",
- "05.59.44.35.45"
- ],
"EMAIL": [
"secr.neurochir@ch-cotebasque.fr"
]
diff --git a/tests/ground_truth/annotations/022_moyen_compte_rendu_cro2_516_23187028.json b/tests/ground_truth/annotations/022_moyen_compte_rendu_cro2_516_23187028.json
index e63e11d..045a716 100644
--- a/tests/ground_truth/annotations/022_moyen_compte_rendu_cro2_516_23187028.json
+++ b/tests/ground_truth/annotations/022_moyen_compte_rendu_cro2_516_23187028.json
@@ -11,9 +11,6 @@
"Centre Hospitalier de la Côte Basque",
"Service Demandeur"
],
- "TEL": [
- "05.59.44.35.35"
- ],
"NOM": [
"Samuel KASPARIAN"
]
diff --git a/tests/ground_truth/annotations/023_complexe_compte_rendu_CRH_23102610.json b/tests/ground_truth/annotations/023_complexe_compte_rendu_CRH_23102610.json
index 8ccd2ee..34b4de0 100644
--- a/tests/ground_truth/annotations/023_complexe_compte_rendu_CRH_23102610.json
+++ b/tests/ground_truth/annotations/023_complexe_compte_rendu_CRH_23102610.json
@@ -7,25 +7,6 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "24 AVENUE DE LA BAIE DE TXIGUNDI"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "64700 HENDAYE"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Dyslipidémie",
@@ -57,6 +38,12 @@
"Loiseau",
"Moldovane"
],
+ "ADRESSE": [
+ "24 AVENUE DE LA BAIE DE TXIGUNDI"
+ ],
+ "CODE_POSTAL": [
+ "64700 HENDAYE"
+ ],
"DATE_NAISSANCE": [
"né le 30/07/1950"
],
@@ -74,23 +61,6 @@
{
"page_number": 1,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service MV",
@@ -134,23 +104,6 @@
{
"page_number": 2,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Docteur MAURY Elisa",
@@ -194,25 +147,6 @@
{
"page_number": 3,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "57 BOULEVARD GENERAL LECLERC"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "64700 HENDAYE"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Dyslipidémie",
@@ -245,6 +179,12 @@
"Loiseau",
"Moldovane"
],
+ "ADRESSE": [
+ "57 BOULEVARD GENERAL LECLERC"
+ ],
+ "CODE_POSTAL": [
+ "64700 HENDAYE"
+ ],
"DATE_NAISSANCE": [
"né le 30/07/1950"
],
@@ -262,23 +202,6 @@
{
"page_number": 4,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service MV",
@@ -322,23 +245,6 @@
{
"page_number": 5,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Docteur MAURY Elisa",
@@ -382,25 +288,6 @@
{
"page_number": 6,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "1 PLACE AMELIE RABA LEON"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "33076 BORDEAUX CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Dyslipidémie",
@@ -432,6 +319,9 @@
"Loiseau",
"Moldovane"
],
+ "ADRESSE": [
+ "1 PLACE AMELIE RABA LEON"
+ ],
"DATE_NAISSANCE": [
"né le 30/07/1950"
],
@@ -449,23 +339,6 @@
{
"page_number": 7,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service MV",
@@ -509,23 +382,6 @@
{
"page_number": 8,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX"
- ],
- "TEL": [
- "05 59 44 35 35",
- "05 59 63 35 88",
- "05.59.44.37.33",
- "05.59.44.37.42",
- "05.59.44.37.32",
- "05.59.44.38.62",
- "05.59.44.37.74",
- "05.33.78.81.89"
- ],
"ETABLISSEMENT": [
"Pôle Médecine Interne",
"Service Docteur MAURY Elisa",
diff --git a/tests/ground_truth/annotations/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.json b/tests/ground_truth/annotations/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.json
index ed49b59..8698378 100644
--- a/tests/ground_truth/annotations/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.json
+++ b/tests/ground_truth/annotations/024_complexe_trackare_trackare-17001141-23066188_17001141_23066188.json
@@ -7,26 +7,20 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "33 RUE JEAN FOURCADE Ville de résidence",
- "39 rue Bernard de Coral ",
- "33 RUE JEAN FOURCADE\tVille de résidence"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "Code Postal: 64122",
- "64122 URRUGNE"
- ],
"IPP": [
"17001141"
],
"DATE_NAISSANCE": [
"Date de naissance: 15/01/2017"
],
- "VILLE": [
- "BAYONNE CEDEX"
+ "CODE_POSTAL": [
+ "Code Postal: 64122",
+ "64122 URRUGNE"
+ ],
+ "ADRESSE": [
+ "33 RUE JEAN FOURCADE Ville de résidence",
+ "39 rue Bernard de Coral ",
+ "33 RUE JEAN FOURCADE\tVille de résidence"
],
"NOM": [
"Céline BELLEAU",
diff --git a/tests/ground_truth/annotations/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.json b/tests/ground_truth/annotations/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.json
index a9044f2..892008c 100644
--- a/tests/ground_truth/annotations/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.json
+++ b/tests/ground_truth/annotations/025_complexe_trackare_trackare-02016820-23095226_02016820_23095226.json
@@ -7,18 +7,6 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "7 RUE DES PADOUANS Ville de résidence",
- "12 rue de l'industrie ",
- "7 RUE DES PADOUANS\tVille de résidence"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "Code Postal: 64100",
- "64600 ANGLET"
- ],
"IPP": [
"02016820"
],
@@ -28,6 +16,15 @@
"VILLE": [
"OLORON STE MARIE"
],
+ "CODE_POSTAL": [
+ "Code Postal: 64100",
+ "64600 ANGLET"
+ ],
+ "ADRESSE": [
+ "7 RUE DES PADOUANS Ville de résidence",
+ "12 rue de l'industrie ",
+ "7 RUE DES PADOUANS\tVille de résidence"
+ ],
"NOM": [
"Laurence MASSE",
"Gilles DELMAS"
diff --git a/tests/ground_truth/annotations/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.json b/tests/ground_truth/annotations/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.json
index d4362ed..9fbb12b 100644
--- a/tests/ground_truth/annotations/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.json
+++ b/tests/ground_truth/annotations/026_complexe_trackare_trackare-15000536-23074384_15000536_23074384.json
@@ -7,26 +7,20 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "1 RUE JOSEPH ST ANDRÉ Ville de résidence",
- "4 RUE PONTRIQUE ",
- "1 RUE JOSEPH ST ANDRÉ\tVille de résidence"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "Code Postal: 64340",
- "64100 BAYONNE"
- ],
"IPP": [
"15000536"
],
"DATE_NAISSANCE": [
"Date de naissance: 08/01/2015"
],
- "VILLE": [
- "BAYONNE CEDEX"
+ "CODE_POSTAL": [
+ "Code Postal: 64340",
+ "64100 BAYONNE"
+ ],
+ "ADRESSE": [
+ "1 RUE JOSEPH ST ANDRÉ Ville de résidence",
+ "4 RUE PONTRIQUE ",
+ "1 RUE JOSEPH ST ANDRÉ\tVille de résidence"
],
"NOM": [
"Marie DUBREL",
diff --git a/tests/ground_truth/annotations/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.json b/tests/ground_truth/annotations/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.json
index c8488e4..1cd0466 100644
--- a/tests/ground_truth/annotations/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.json
+++ b/tests/ground_truth/annotations/027_complexe_trackare_trackare-10027557-23183041_10027557_23183041.json
@@ -7,18 +7,6 @@
{
"page_number": 0,
"pii": {
- "ADRESSE": [
- "13, Avenue de l'Interne J",
- "LOEB BP 8",
- "4 RUE DU PETIT NANOT Ville de résidence",
- "1, PLACE PEREIRE ",
- "4 RUE DU PETIT NANOT\tVille de résidence"
- ],
- "CODE_POSTAL": [
- "64109 BAYONNE CEDEX",
- "Code Postal: 64340",
- "64100 BAYONNE"
- ],
"IPP": [
"10027557"
],
@@ -28,6 +16,15 @@
"VILLE": [
"PARIS"
],
+ "CODE_POSTAL": [
+ "Code Postal: 64340",
+ "64100 BAYONNE"
+ ],
+ "ADRESSE": [
+ "4 RUE DU PETIT NANOT Ville de résidence",
+ "1, PLACE PEREIRE ",
+ "4 RUE DU PETIT NANOT\tVille de résidence"
+ ],
"NOM": [
"Marie LACLAU-LACROUTS",
"Georges PEPIN"
diff --git a/tests/ground_truth/annotations/dataset_statistics.json b/tests/ground_truth/annotations/dataset_statistics.json
index dd3a634..15b5708 100644
--- a/tests/ground_truth/annotations/dataset_statistics.json
+++ b/tests/ground_truth/annotations/dataset_statistics.json
@@ -1,23 +1,23 @@
{
"total_documents": 25,
"total_pages": 133,
- "total_pii": 1167,
+ "total_pii": 907,
"by_type": {
"ETABLISSEMENT": 83,
- "TEL": 193,
"NOM": 507,
"IPP": 25,
- "ADRESSE": 79,
- "CODE_POSTAL": 50,
+ "ADRESSE": 29,
+ "CODE_POSTAL": 24,
"DATE_NAISSANCE": 114,
"EMAIL": 62,
"RPPS": 21,
"EPISODE": 18,
- "VILLE": 5,
+ "VILLE": 3,
+ "TEL": 11,
"AGE": 5,
"NIR": 2,
"DOSSIER": 3
},
- "avg_pii_per_doc": 46.7,
+ "avg_pii_per_doc": 36.3,
"avg_pages_per_doc": 5.3
}
\ No newline at end of file
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.audit.jsonl b/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.audit.jsonl
index 530316c..5cde45b 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.audit.jsonl
+++ b/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.audit.jsonl
@@ -1,5 +1,4 @@
{"page": 0, "kind": "ETAB", "original": "Centre Hospitalier de la Côte Basque", "placeholder": "[ETABLISSEMENT]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "0559443674", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "JAOUEN Anne-Christine", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "MENARD-DEROURE Fanny", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "LEYSSENE David Dr", "placeholder": "[NOM]", "bbox_hint": null}
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_raster.pdf
index 6acbafe..98f5f29 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_vector.pdf
index 5f93327..e49cfe8 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/001_simple_unknown_BACTERIO_23018396.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.audit.jsonl b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.audit.jsonl
index a799466..a378a1d 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.audit.jsonl
+++ b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.audit.jsonl
@@ -1,5 +1,4 @@
{"page": 0, "kind": "ETAB", "original": "Centre Hospitalier de la Côte Basque", "placeholder": "[ETABLISSEMENT]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "0559443674", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "JAOUEN Anne-Christine", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "MENARD-DEROURE Fanny", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "LEYSSENE David Dr", "placeholder": "[NOM]", "bbox_hint": null}
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.pseudonymise.txt b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.pseudonymise.txt
index f1fc383..75ab842 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.pseudonymise.txt and b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.pseudonymise.txt differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_raster.pdf
index d47fce4..d9ddb3d 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_vector.pdf
index 6307131..4294b17 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/002_simple_unknown_bacterio_476_23159413.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.pseudonymise.txt b/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.pseudonymise.txt
index d84d838..1453993 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.pseudonymise.txt and b/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.pseudonymise.txt differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_raster.pdf
index ab9c1e4..24be23c 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_vector.pdf
index bb3150d..dd00d60 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/003_simple_compte_rendu_CRO_23155084.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.audit.jsonl b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.audit.jsonl
index 2dee567..da77716 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.audit.jsonl
+++ b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.audit.jsonl
@@ -1,8 +1,3 @@
-{"page": 0, "kind": "ADRESSE", "original": "13, Avenue de l'Interne J", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 0, "kind": "ADRESSE", "original": "LOEB BP 8", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 0, "kind": "CODE_POSTAL", "original": "64109 BAYONNE CEDEX", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05 59 44 35 35", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05 59 63 35 88", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "ETAB", "original": "Pôle Spécialités Médicales", "placeholder": "[MASK]", "bbox_hint": null}
{"page": 0, "kind": "ETAB", "original": "Service de Gastro-Entérologie - Oncologie Digestive", "placeholder": "[MASK]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Christelle Béraut", "placeholder": "[NOM]", "bbox_hint": null}
@@ -25,17 +20,8 @@
{"page": 0, "kind": "EMAIL", "original": "boui@ch-cotebasque.fr", "placeholder": "[EMAIL]", "bbox_hint": null}
{"page": 0, "kind": "RPPS", "original": "10100532760", "placeholder": "[RPPS]", "bbox_hint": null}
{"page": 0, "kind": "EMAIL", "original": "t@ch-cotebasque.fr", "placeholder": "[EMAIL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.37.23", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.37.25", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.37.22", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.37.29", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "IPP", "original": "09018266", "placeholder": "[IPP]", "bbox_hint": null}
{"page": 0, "kind": "EPISODE", "original": "N° Episode 23155836", "placeholder": "[EPISODE]", "bbox_hint": null}
-{"page": 1, "kind": "ADRESSE", "original": "13, Avenue de l'Interne J", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 1, "kind": "ADRESSE", "original": "LOEB BP 8", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 1, "kind": "CODE_POSTAL", "original": "64109 BAYONNE CEDEX", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
-{"page": 1, "kind": "TEL", "original": "05 59 44 35 35", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 1, "kind": "TEL", "original": "05 59 63 35 88", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 1, "kind": "ETAB", "original": "Pôle Spécialités Médicales", "placeholder": "[MASK]", "bbox_hint": null}
{"page": 1, "kind": "ETAB", "original": "Service de Gastro-Entérologie - Oncologie Digestive", "placeholder": "[MASK]", "bbox_hint": null}
{"page": 1, "kind": "NOM", "original": "Christelle Béraut", "placeholder": "[NOM]", "bbox_hint": null}
@@ -54,9 +40,5 @@
{"page": 1, "kind": "EMAIL", "original": "boui@ch-cotebasque.fr", "placeholder": "[EMAIL]", "bbox_hint": null}
{"page": 1, "kind": "RPPS", "original": "10100532760", "placeholder": "[RPPS]", "bbox_hint": null}
{"page": 1, "kind": "EMAIL", "original": "fprevost@ch-cotebasque.fr", "placeholder": "[EMAIL]", "bbox_hint": null}
-{"page": 1, "kind": "TEL", "original": "05.59.44.37.23", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 1, "kind": "TEL", "original": "05.59.44.37.25", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 1, "kind": "TEL", "original": "05.59.44.37.22", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 1, "kind": "TEL", "original": "05.59.44.37.29", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 1, "kind": "IPP", "original": "09018266", "placeholder": "[IPP]", "bbox_hint": null}
{"page": 1, "kind": "EPISODE", "original": "N° Episode 23155836", "placeholder": "[EPISODE]", "bbox_hint": null}
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.pseudonymise.txt b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.pseudonymise.txt
index 353cbde..ccb7b09 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.pseudonymise.txt
+++ b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.pseudonymise.txt
@@ -83,7 +83,7 @@ aiguë CTSI 2 sur fond de pancréatite chronique calcifiante. Pas d'argument tom
Secrétariat et R.V. lithiasique. Pas de complication vasculaire.
Tel [TEL]
Fax [TEL]
-Patient(e) : [NOM] [NOM] [NOM] Né(e) le 27/04/1959
+Patient(e) : [NOM] [NOM] [NOM] Né(e) le [DATE_NAISSANCE]
IPP : [IPP] / [EPISODE] (MEDECINE GASTRO B2 HC)
V2 - Imprimé le 18/04/2025 à 10 : 42 par Page(s): 1 sur 2N° Finess CENTRE HOSPITALIER COTE BASQUE
✉ [ADRESSE]. [ADRESSE], [CODE_POSTAL]
@@ -165,6 +165,6 @@ Mme S. [NOM]
Secrétariat et R.V.
Tel [TEL]
Fax [TEL]
-Patient(e) : [NOM] [NOM] [NOM] Né(e) le 27/04/1959
+Patient(e) : [NOM] [NOM] [NOM] Né(e) le [DATE_NAISSANCE]
IPP : [IPP] / [EPISODE] (MEDECINE GASTRO B2 HC)
V2 - Imprimé le 18/04/2025 à 10 : 42 par Page(s): 2 sur 2
\ No newline at end of file
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_raster.pdf
index 4585a96..bd52f0a 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_vector.pdf
index eef44e9..9cd03f2 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/005_simple_compte_rendu_CRH_23155836.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.audit.jsonl b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.audit.jsonl
index 2b829e3..ab2415d 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.audit.jsonl
+++ b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.audit.jsonl
@@ -1,17 +1,3 @@
-{"page": -1, "kind": "VILLE", "original": "CHERAUTE", "placeholder": "[VILLE]", "bbox_hint": null}
-{"page": -1, "kind": "VILLE", "original": "CHERAUTE", "placeholder": "[VILLE]", "bbox_hint": null}
-{"page": -1, "kind": "VILLE", "original": "MAULEON", "placeholder": "[VILLE]", "bbox_hint": null}
-{"page": -1, "kind": "VILLE", "original": "MAULEON", "placeholder": "[VILLE]", "bbox_hint": null}
-{"page": -1, "kind": "CODE_POSTAL", "original": "64130", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
-{"page": -1, "kind": "CODE_POSTAL", "original": "64130", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
-{"page": -1, "kind": "EPISODE", "original": "23202435", "placeholder": "[NDA]", "bbox_hint": null}
-{"page": -1, "kind": "EPISODE", "original": "23202435", "placeholder": "[NDA]", "bbox_hint": null}
-{"page": -1, "kind": "EPISODE", "original": "23202435", "placeholder": "[NDA]", "bbox_hint": null}
-{"page": -1, "kind": "ADRESSE", "original": "22 LOT MENDI ALDE", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": -1, "kind": "ADRESSE", "original": "22 LOT MENDI ALDE", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 0, "kind": "ADRESSE", "original": "13, Avenue de l'Interne J", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 0, "kind": "ADRESSE", "original": "LOEB BP 8", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 0, "kind": "CODE_POSTAL", "original": "64109 BAYONNE CEDEX", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
{"page": 0, "kind": "IPP", "original": "14004105", "placeholder": "[IPP]", "bbox_hint": null}
{"page": 0, "kind": "DATE_NAISSANCE", "original": "Date de naissance: 03/04/1946", "placeholder": "[DATE_NAISSANCE]", "bbox_hint": null}
{"page": 0, "kind": "VILLE", "original": "CHERAUTE", "placeholder": "[VILLE]", "bbox_hint": null}
@@ -19,7 +5,6 @@
{"page": 0, "kind": "ADRESSE", "original": "22 LOT MENDI ALDE Ville de résidence", "placeholder": "[ADRESSE]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Romain DIDAILLER", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "TEL", "original": "05 59 28 07 85", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "ADRESSE", "original": "4, AVENUE DE TRÉVILLE ", "placeholder": "[ADRESSE]", "bbox_hint": null}
{"page": 0, "kind": "CODE_POSTAL", "original": "64130 MAULEON-LICHARRE", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "François GARNIER", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "TEL", "original": "0676085336", "placeholder": "[TEL]", "bbox_hint": null}
@@ -33,7 +18,6 @@
{"page": 0, "kind": "ADRESSE", "original": "22 LOT MENDI ALDE\tVille de résidence", "placeholder": "[ADRESSE]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Romain DIDAILLER", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "TEL", "original": "05 59 28 07 85", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "ADRESSE", "original": "4, AVENUE DE TRÉVILLE ", "placeholder": "[ADRESSE]", "bbox_hint": null}
{"page": 0, "kind": "CODE_POSTAL", "original": "64130 MAULEON-LICHARRE", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
{"page": 0, "kind": "TEL", "original": "0676085336", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "TEL", "original": "0676085336", "placeholder": "[TEL]", "bbox_hint": null}
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pseudonymise.txt b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pseudonymise.txt
index da832fc..bc1cf3c 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pseudonymise.txt and b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.pseudonymise.txt differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_raster.pdf
index 01662f6..b8bb64c 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_vector.pdf
index fac67fa..d6a8d9e 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/008_simple_trackare_trackare-14004105-23202435_14004105_23202435.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.pseudonymise.txt b/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.pseudonymise.txt
index 0cf1e66..dccf618 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.pseudonymise.txt and b/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.pseudonymise.txt differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_raster.pdf
index abf917b..013c4d9 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_vector.pdf
index 8257839..1280a50 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/009_simple_compte_rendu_CRO_23051225.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.audit.jsonl b/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.audit.jsonl
index babf430..ed58e01 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.audit.jsonl
+++ b/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.audit.jsonl
@@ -4,7 +4,6 @@
{"page": 0, "kind": "NOM", "original": "Lewis GRECOURT Dr", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Elodie LAURENT Dr", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "DIDAILLER Romain", "placeholder": "[NOM]", "bbox_hint": null}
-{"page": 0, "kind": "ADRESSE", "original": "13 Av. de l'Interne Jacques Loeb", "placeholder": "[ADRESSE]", "bbox_hint": null}
{"page": 0, "kind": "CODE_POSTAL", "original": "64100 BAYONNE", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
{"page": 0, "kind": "CODE_POSTAL", "original": "64240 MACAYE", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "DIDAILLER Romain", "placeholder": "[NOM]", "bbox_hint": null}
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_raster.pdf
index f0b5123..42a859f 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_vector.pdf
index ea443f4..fc9dc26 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/010_simple_anapath_ANAPATH_23217289.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.pseudonymise.txt b/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.pseudonymise.txt
index 82c396a..2bf5033 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.pseudonymise.txt and b/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.pseudonymise.txt differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_raster.pdf
index 938c12f..e21e8eb 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_vector.pdf
index 90e2312..aa781c2 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_vector.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/011_moyen_compte_rendu_CRH_23080179.redacted_vector.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.audit.jsonl b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.audit.jsonl
index 70c0bc4..35e6daf 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.audit.jsonl
+++ b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.audit.jsonl
@@ -1,7 +1,4 @@
-{"page": 0, "kind": "ADRESSE", "original": "13 avenue de l", "placeholder": "[ADRESSE]", "bbox_hint": null}
-{"page": 0, "kind": "CODE_POSTAL", "original": "64109 BAYONNE Cedex", "placeholder": "[CODE_POSTAL]", "bbox_hint": null}
{"page": 0, "kind": "ETAB", "original": "Pôle de Chirurgie - Anesthésie - Bloc Opératoire", "placeholder": "[MASK]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.38.44", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "TEL", "original": "05.59.4 4.35.23", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Romain DIDAILLER", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Claude UTHURRISQ", "placeholder": "[NOM]", "bbox_hint": null}
@@ -12,21 +9,13 @@
{"page": 0, "kind": "NOM", "original": "Félix GOUTORBE", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Marie LACLAU-LACROUTS", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "ETAB", "original": "Unité Urologie", "placeholder": "[MASK]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.35.05", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.35.03", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Vincent COMAT", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Marie-Irene LARTIGUE", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Antoine DOUARD", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Yann LA MMERTYN", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Laurent MASCLE", "placeholder": "[NOM]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.38.44", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.44.94", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Alessandro FALCHETTI", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Florence MAZERES", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Caroline RIVERA", "placeholder": "[NOM]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Bruno CORDON", "placeholder": "[NOM]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.43.42", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.35.02", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.35.09", "placeholder": "[TEL]", "bbox_hint": null}
-{"page": 0, "kind": "TEL", "original": "05.59.44.32.01", "placeholder": "[TEL]", "bbox_hint": null}
{"page": 0, "kind": "NOM", "original": "Marie LACLAU-LACROUTS", "placeholder": "[NOM]", "bbox_hint": null}
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.pseudonymise.txt b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.pseudonymise.txt
index d91ccd8..6690828 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.pseudonymise.txt and b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.pseudonymise.txt differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_raster.pdf b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_raster.pdf
index d350c0c..8a3e123 100644
Binary files a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_raster.pdf and b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_raster.pdf differ
diff --git a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_vector.pdf b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_vector.pdf
index 3b606b7..4fe725e 100644
--- a/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_vector.pdf
+++ b/tests/ground_truth/pdfs/baseline_anonymized/012_moyen_compte_rendu_CRH_692_23200418.redacted_vector.pdf
@@ -3071,45 +3071,46 @@ endstream
endobj
223 0 obj
-<>
+<>
stream
-x=ko7e|lY8c9a"md<6믪{ @h"bs393BRdNl>?x^_^l>|p凫j}o[jy.oѓ'}dO"/Lv>C8@q}z>|?}|vȎucvzrT=0x_$".yEf3dີ$r f~˙ xf?ggٛH,%¸f3႞]Dg˟+X6;ӋEvX:8g2;蜘d6g