Files
Dom b47f5c47e0 feat(schema): module de nettoyage des JSONs pour consommation aval
Le pipeline produit un JSON riche pendant l'exécution (ratios
checkbox, OCR raw, flags _parse_error/_truncated_loop/_crop_recodage,
_source, _elapsed_s…). Utile en audit, mais pollue quand on veut
exposer le résultat à un consommateur aval (Excel, dashboard, API).

pipeline/schema.py :
- SCHEMA_VERSION "2.0"
- clean_dossier(raw) : retourne une copie propre avec structure stable
  (en-tête → codage → GHM/GHS → décisions) et validation ATIH en
  format compact (summary + cross_checks + flags par champ).
- CLEAN_FIELDS_RECUEIL / CLEAN_FIELDS_CONCERTATION_{1,2} / CLEAN_FIELDS_PREUVES
  documentent les champs stables par type de page.
- CLI : `python -m pipeline.schema` → nettoie `output/v2/*.json` vers
  `output/v2_clean/`.

Séparation claire : `output/v2/` reste le JSON raw (audit), `output/v2_clean/`
est la sortie propre et stable pour livrables.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 15:54:50 +02:00

272 lines
7.1 KiB
JSON
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"fichier": "OGC 74",
"pdf_hash": "58076293464e9771",
"schema_version": "2.0",
"extraction": {
"recueil": {
"etablissement": "CLINIQUE D'ARCACHON",
"finess": "330780206",
"date_debut_controle": "13/02/2018",
"n_ogc": "74",
"n_champ": "1",
"dates_sejour": "08/04/2016 au 13/04/2016",
"sejour_etab": {
"age": "52",
"sexe": "1",
"duree_sejour": "5",
"mode_entree": "1",
"provenance": "8",
"mode_sortie": "1",
"destination": "8"
},
"sejour_reco": {
"age": "52",
"sexe": "1",
"duree_sejour": "5",
"mode_entree": "1",
"provenance": "8",
"mode_sortie": "1",
"destination": "8"
},
"rum_etab": {
"um": "40 C",
"igs": "II",
"duree": "5",
"dates": "du 08/04/2016 au 13/04/2016"
},
"codage_etab": {
"dp": "A099",
"dp_libelle": "GASTROENTERITE COLITE ORIGINE SAI",
"dr": "C795 * 2",
"das": []
},
"codage_reco": {
"dp": "A099",
"dr": "C795 * 2",
"das": []
},
"actes_etab": [],
"actes_reco": [],
"ghm_etab": "06M032",
"ghs_etab": "2130",
"ghm_reco": "18M041",
"ghs_reco": "6772",
"recodage_impactant": "1",
"ghs_injustifie": "0",
"accord_desaccord": "désaccord",
"praticien_conseil": "DR JP VIGNAU",
"_validation": {
"summary": {
"valid": 8,
"invalid": 0,
"empty": 0,
"total_codes": 8,
"ghm_ghs_incoherents": 0
},
"codage_etab": {
"dp": {
"valid": true,
"libelle_ref": "Gastroentérite et colite dorigine non précisée"
},
"dr": {
"valid": true,
"libelle_ref": "Tumeur maligne secondaire des os et de la moelle osseuse"
},
"das": []
},
"codage_reco": {
"dp": {
"valid": true,
"libelle_ref": "Gastroentérite et colite dorigine non précisée"
},
"dr": {
"valid": true,
"libelle_ref": "Tumeur maligne secondaire des os et de la moelle osseuse"
},
"das": []
},
"ghm_etab": {
"valid": true
},
"ghs_etab": {
"valid": true
},
"ghm_reco": {
"valid": true
},
"ghs_reco": {
"valid": true
},
"cross_checks": {
"etab_ghm_ghs_coherent": true,
"reco_ghm_ghs_coherent": true
}
}
},
"concertation_2": {
"ghs_initial": "",
"ghs_avant_concertation": "",
"ghs_final": "",
"decision": "",
"date_concertation": "2-3",
"praticien_controleur": "",
"medecin_dim": "",
"_validation": {
"ghs_initial": {
"code": "",
"valid": null
},
"ghs_avant_concertation": {
"code": "",
"valid": null
},
"ghs_final": {
"code": "",
"valid": null
}
}
},
"concertation_1": {
"date_concertation": "2.3.18",
"argumentaire": "Atteste avoir pris connaissance des éléments du dossier y compris ceux couverts par le secret médical et des arguments soutenus par les médecins contrôleurs et avoir eu l'opportunité d'en débattre contradictoirement"
},
"preuves": {
"date": "2023/01/18",
"praticien_controleur": [
"Dr RADZIKOWSKI",
"Dr DELAYE-PHULPIN",
"Dr TURBAN",
"Dr DUVAL",
"Dr VIGNAU",
"Dr PROMAX"
],
"medecin_dim": "Dr ETTORCHI-TARDY",
"pieces": [
{
"intitule": "Compte-rendu d'acte : TDP TAP TRN",
"present": true,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Compte-rendu opératoire",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Compte-rendu d'accouchement",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Compte-rendu d'examen complémentaire :",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Compte-rendu d'imagerie :",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Compte-rendu d'anatomopathologie",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Observations médicales",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Dossier de transfusion",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Dossier d'anesthésie",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Administration thérapeutique",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Compte-rendu d'hospitalisation",
"present": true,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Lettre de sortie",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Eléments de surveillance du dossier infirmier",
"present": true,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Prise en charge psychologique",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Prise en charge kinésithérapeute",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Prise en charge diététique",
"present": false,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
},
{
"intitule": "Autre : b-c",
"present": true,
"photocopie": false,
"absent_date": "",
"date_obtention": ""
}
]
}
},
"_meta": {
"pipeline_version": "v1",
"ocr_model": "zai-org/GLM-OCR",
"generated_at": "2026-04-24T12:16:17+00:00"
}
}