feat: traitement des sous-dossiers patients avec sorties miroir
Permet d'organiser les PDFs en sous-répertoires (un niveau) dans le dossier d'entrée. Les sorties reflètent cette structure dans output/. Les PDFs à la racine continuent de fonctionner comme avant. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
44
src/main.py
44
src/main.py
@@ -98,15 +98,24 @@ def write_outputs(
|
|||||||
anonymized_text: str,
|
anonymized_text: str,
|
||||||
dossier: DossierMedical,
|
dossier: DossierMedical,
|
||||||
report: AnonymizationReport,
|
report: AnonymizationReport,
|
||||||
|
subdir: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Écrit les fichiers de sortie."""
|
"""Écrit les fichiers de sortie."""
|
||||||
|
anon_dir = ANONYMIZED_DIR / subdir if subdir else ANONYMIZED_DIR
|
||||||
|
struct_dir = STRUCTURED_DIR / subdir if subdir else STRUCTURED_DIR
|
||||||
|
rep_dir = REPORTS_DIR / subdir if subdir else REPORTS_DIR
|
||||||
|
|
||||||
|
anon_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
struct_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
rep_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Texte anonymisé
|
# Texte anonymisé
|
||||||
anon_path = ANONYMIZED_DIR / f"{stem}_anonymized.txt"
|
anon_path = anon_dir / f"{stem}_anonymized.txt"
|
||||||
anon_path.write_text(anonymized_text, encoding="utf-8")
|
anon_path.write_text(anonymized_text, encoding="utf-8")
|
||||||
logger.info(" → %s", anon_path)
|
logger.info(" → %s", anon_path)
|
||||||
|
|
||||||
# JSON structuré
|
# JSON structuré
|
||||||
json_path = STRUCTURED_DIR / f"{stem}_cim10.json"
|
json_path = struct_dir / f"{stem}_cim10.json"
|
||||||
json_path.write_text(
|
json_path.write_text(
|
||||||
dossier.model_dump_json(indent=2, exclude_none=True),
|
dossier.model_dump_json(indent=2, exclude_none=True),
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
@@ -114,7 +123,7 @@ def write_outputs(
|
|||||||
logger.info(" → %s", json_path)
|
logger.info(" → %s", json_path)
|
||||||
|
|
||||||
# Rapport d'anonymisation
|
# Rapport d'anonymisation
|
||||||
report_path = REPORTS_DIR / f"{stem}_report.json"
|
report_path = rep_dir / f"{stem}_report.json"
|
||||||
report_path.write_text(
|
report_path.write_text(
|
||||||
report.model_dump_json(indent=2),
|
report.model_dump_json(indent=2),
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
@@ -164,25 +173,44 @@ def main(input_path: str | None = None) -> None:
|
|||||||
_use_rag = False
|
_use_rag = False
|
||||||
|
|
||||||
input_p = Path(args.input)
|
input_p = Path(args.input)
|
||||||
|
|
||||||
|
# Collecte des groupes (pdfs, subdir) à traiter
|
||||||
|
groups: list[tuple[list[Path], str | None]] = []
|
||||||
|
|
||||||
if input_p.is_file():
|
if input_p.is_file():
|
||||||
pdfs = [input_p]
|
groups.append(([input_p], None))
|
||||||
elif input_p.is_dir():
|
elif input_p.is_dir():
|
||||||
pdfs = sorted(input_p.glob("*.pdf"))
|
# PDFs à la racine
|
||||||
|
root_pdfs = sorted(input_p.glob("*.pdf"))
|
||||||
|
if root_pdfs:
|
||||||
|
groups.append((root_pdfs, None))
|
||||||
|
|
||||||
|
# Sous-dossiers directs (un seul niveau)
|
||||||
|
for child in sorted(input_p.iterdir()):
|
||||||
|
if child.is_dir():
|
||||||
|
sub_pdfs = sorted(child.glob("*.pdf"))
|
||||||
|
if sub_pdfs:
|
||||||
|
groups.append((sub_pdfs, child.name))
|
||||||
else:
|
else:
|
||||||
logger.error("Chemin introuvable : %s", input_p)
|
logger.error("Chemin introuvable : %s", input_p)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if not pdfs:
|
total = sum(len(pdfs) for pdfs, _ in groups)
|
||||||
|
if total == 0:
|
||||||
logger.warning("Aucun PDF trouvé dans %s", input_p)
|
logger.warning("Aucun PDF trouvé dans %s", input_p)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
logger.info("Traitement de %d PDF(s)...", len(pdfs))
|
logger.info("Traitement de %d PDF(s)...", total)
|
||||||
|
|
||||||
|
for pdfs, subdir in groups:
|
||||||
|
if subdir:
|
||||||
|
logger.info("--- Dossier %s (%d PDFs) ---", subdir, len(pdfs))
|
||||||
|
|
||||||
for pdf_path in pdfs:
|
for pdf_path in pdfs:
|
||||||
try:
|
try:
|
||||||
anonymized_text, dossier, report = process_pdf(pdf_path)
|
anonymized_text, dossier, report = process_pdf(pdf_path)
|
||||||
stem = pdf_path.stem.replace(" ", "_")
|
stem = pdf_path.stem.replace(" ", "_")
|
||||||
write_outputs(stem, anonymized_text, dossier, report)
|
write_outputs(stem, anonymized_text, dossier, report, subdir=subdir)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Erreur lors du traitement de %s", pdf_path.name)
|
logger.exception("Erreur lors du traitement de %s", pdf_path.name)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user