Le pipeline produit un JSON riche pendant l'exécution (ratios
checkbox, OCR raw, flags _parse_error/_truncated_loop/_crop_recodage,
_source, _elapsed_s…). Utile en audit, mais pollue quand on veut
exposer le résultat à un consommateur aval (Excel, dashboard, API).
pipeline/schema.py :
- SCHEMA_VERSION "2.0"
- clean_dossier(raw) : retourne une copie propre avec structure stable
(en-tête → codage → GHM/GHS → décisions) et validation ATIH en
format compact (summary + cross_checks + flags par champ).
- CLEAN_FIELDS_RECUEIL / CLEAN_FIELDS_CONCERTATION_{1,2} / CLEAN_FIELDS_PREUVES
documentent les champs stables par type de page.
- CLI : `python -m pipeline.schema` → nettoie `output/v2/*.json` vers
`output/v2_clean/`.
Séparation claire : `output/v2/` reste le JSON raw (audit), `output/v2_clean/`
est la sortie propre et stable pour livrables.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
272 lines
7.1 KiB
JSON
272 lines
7.1 KiB
JSON
{
|
||
"fichier": "OGC 74",
|
||
"pdf_hash": "58076293464e9771",
|
||
"schema_version": "2.0",
|
||
"extraction": {
|
||
"recueil": {
|
||
"etablissement": "CLINIQUE D'ARCACHON",
|
||
"finess": "330780206",
|
||
"date_debut_controle": "13/02/2018",
|
||
"n_ogc": "74",
|
||
"n_champ": "1",
|
||
"dates_sejour": "08/04/2016 au 13/04/2016",
|
||
"sejour_etab": {
|
||
"age": "52",
|
||
"sexe": "1",
|
||
"duree_sejour": "5",
|
||
"mode_entree": "1",
|
||
"provenance": "8",
|
||
"mode_sortie": "1",
|
||
"destination": "8"
|
||
},
|
||
"sejour_reco": {
|
||
"age": "52",
|
||
"sexe": "1",
|
||
"duree_sejour": "5",
|
||
"mode_entree": "1",
|
||
"provenance": "8",
|
||
"mode_sortie": "1",
|
||
"destination": "8"
|
||
},
|
||
"rum_etab": {
|
||
"um": "40 C",
|
||
"igs": "II",
|
||
"duree": "5",
|
||
"dates": "du 08/04/2016 au 13/04/2016"
|
||
},
|
||
"codage_etab": {
|
||
"dp": "A099",
|
||
"dp_libelle": "GASTROENTERITE COLITE ORIGINE SAI",
|
||
"dr": "C795 * 2",
|
||
"das": []
|
||
},
|
||
"codage_reco": {
|
||
"dp": "A099",
|
||
"dr": "C795 * 2",
|
||
"das": []
|
||
},
|
||
"actes_etab": [],
|
||
"actes_reco": [],
|
||
"ghm_etab": "06M032",
|
||
"ghs_etab": "2130",
|
||
"ghm_reco": "18M041",
|
||
"ghs_reco": "6772",
|
||
"recodage_impactant": "1",
|
||
"ghs_injustifie": "0",
|
||
"accord_desaccord": "désaccord",
|
||
"praticien_conseil": "DR JP VIGNAU",
|
||
"_validation": {
|
||
"summary": {
|
||
"valid": 8,
|
||
"invalid": 0,
|
||
"empty": 0,
|
||
"total_codes": 8,
|
||
"ghm_ghs_incoherents": 0
|
||
},
|
||
"codage_etab": {
|
||
"dp": {
|
||
"valid": true,
|
||
"libelle_ref": "Gastroentérite et colite d’origine non précisée"
|
||
},
|
||
"dr": {
|
||
"valid": true,
|
||
"libelle_ref": "Tumeur maligne secondaire des os et de la moelle osseuse"
|
||
},
|
||
"das": []
|
||
},
|
||
"codage_reco": {
|
||
"dp": {
|
||
"valid": true,
|
||
"libelle_ref": "Gastroentérite et colite d’origine non précisée"
|
||
},
|
||
"dr": {
|
||
"valid": true,
|
||
"libelle_ref": "Tumeur maligne secondaire des os et de la moelle osseuse"
|
||
},
|
||
"das": []
|
||
},
|
||
"ghm_etab": {
|
||
"valid": true
|
||
},
|
||
"ghs_etab": {
|
||
"valid": true
|
||
},
|
||
"ghm_reco": {
|
||
"valid": true
|
||
},
|
||
"ghs_reco": {
|
||
"valid": true
|
||
},
|
||
"cross_checks": {
|
||
"etab_ghm_ghs_coherent": true,
|
||
"reco_ghm_ghs_coherent": true
|
||
}
|
||
}
|
||
},
|
||
"concertation_2": {
|
||
"ghs_initial": "",
|
||
"ghs_avant_concertation": "",
|
||
"ghs_final": "",
|
||
"decision": "",
|
||
"date_concertation": "2-3",
|
||
"praticien_controleur": "",
|
||
"medecin_dim": "",
|
||
"_validation": {
|
||
"ghs_initial": {
|
||
"code": "",
|
||
"valid": null
|
||
},
|
||
"ghs_avant_concertation": {
|
||
"code": "",
|
||
"valid": null
|
||
},
|
||
"ghs_final": {
|
||
"code": "",
|
||
"valid": null
|
||
}
|
||
}
|
||
},
|
||
"concertation_1": {
|
||
"date_concertation": "2.3.18",
|
||
"argumentaire": "Atteste avoir pris connaissance des éléments du dossier y compris ceux couverts par le secret médical et des arguments soutenus par les médecins contrôleurs et avoir eu l'opportunité d'en débattre contradictoirement"
|
||
},
|
||
"preuves": {
|
||
"date": "2023/01/18",
|
||
"praticien_controleur": [
|
||
"Dr RADZIKOWSKI",
|
||
"Dr DELAYE-PHULPIN",
|
||
"Dr TURBAN",
|
||
"Dr DUVAL",
|
||
"Dr VIGNAU",
|
||
"Dr PROMAX"
|
||
],
|
||
"medecin_dim": "Dr ETTORCHI-TARDY",
|
||
"pieces": [
|
||
{
|
||
"intitule": "Compte-rendu d'acte : TDP TAP TRN",
|
||
"present": true,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Compte-rendu opératoire",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Compte-rendu d'accouchement",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Compte-rendu d'examen complémentaire :",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Compte-rendu d'imagerie :",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Compte-rendu d'anatomopathologie",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Observations médicales",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Dossier de transfusion",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Dossier d'anesthésie",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Administration thérapeutique",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Compte-rendu d'hospitalisation",
|
||
"present": true,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Lettre de sortie",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Eléments de surveillance du dossier infirmier",
|
||
"present": true,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Prise en charge psychologique",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Prise en charge kinésithérapeute",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Prise en charge diététique",
|
||
"present": false,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
},
|
||
{
|
||
"intitule": "Autre : b-c",
|
||
"present": true,
|
||
"photocopie": false,
|
||
"absent_date": "",
|
||
"date_obtention": ""
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"_meta": {
|
||
"pipeline_version": "v1",
|
||
"ocr_model": "zai-org/GLM-OCR",
|
||
"generated_at": "2026-04-24T12:16:17+00:00"
|
||
}
|
||
} |