feat(schema): module de nettoyage des JSONs pour consommation aval
Le pipeline produit un JSON riche pendant l'exécution (ratios
checkbox, OCR raw, flags _parse_error/_truncated_loop/_crop_recodage,
_source, _elapsed_s…). Utile en audit, mais pollue quand on veut
exposer le résultat à un consommateur aval (Excel, dashboard, API).
pipeline/schema.py :
- SCHEMA_VERSION "2.0"
- clean_dossier(raw) : retourne une copie propre avec structure stable
(en-tête → codage → GHM/GHS → décisions) et validation ATIH en
format compact (summary + cross_checks + flags par champ).
- CLEAN_FIELDS_RECUEIL / CLEAN_FIELDS_CONCERTATION_{1,2} / CLEAN_FIELDS_PREUVES
documentent les champs stables par type de page.
- CLI : `python -m pipeline.schema` → nettoie `output/v2/*.json` vers
`output/v2_clean/`.
Séparation claire : `output/v2/` reste le JSON raw (audit), `output/v2_clean/`
est la sortie propre et stable pour livrables.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
188
output/v2_clean/OGC 20.json
Normal file
188
output/v2_clean/OGC 20.json
Normal file
@@ -0,0 +1,188 @@
|
||||
{
|
||||
"fichier": "OGC 20",
|
||||
"pdf_hash": "eb280d07819ff75d",
|
||||
"schema_version": "2.0",
|
||||
"extraction": {
|
||||
"recueil": {
|
||||
"etablissement": "CLINIQUE D'ARCACHON",
|
||||
"finess": "330780206",
|
||||
"date_debut_controle": "13/02/2018",
|
||||
"n_ogc": "20",
|
||||
"n_champ": "1",
|
||||
"dates_sejour": "09/03/2016 au 18/03/2016",
|
||||
"sejour_etab": {
|
||||
"age": "57",
|
||||
"sexe": "1",
|
||||
"duree_sejour": "9",
|
||||
"mode_entree": "8",
|
||||
"provenance": "8",
|
||||
"mode_sortie": "8",
|
||||
"destination": ""
|
||||
},
|
||||
"sejour_reco": {
|
||||
"age": "57",
|
||||
"sexe": "1",
|
||||
"duree_sejour": "9",
|
||||
"mode_entree": "8",
|
||||
"provenance": "8",
|
||||
"mode_sortie": "8",
|
||||
"destination": ""
|
||||
},
|
||||
"rum_etab": {
|
||||
"um": "0",
|
||||
"igs": "53 C",
|
||||
"duree": "9",
|
||||
"dates": "du 09/03/2016 au 18/03/2016"
|
||||
},
|
||||
"codage_etab": {
|
||||
"dp": "D374",
|
||||
"dr": "I022",
|
||||
"das": [
|
||||
{
|
||||
"code": "T814",
|
||||
"position": "3",
|
||||
"libelle": "INFECT. APRES UN ACTE, NCA"
|
||||
}
|
||||
]
|
||||
},
|
||||
"codage_reco": {
|
||||
"dp": "D374",
|
||||
"dr": "I022",
|
||||
"das": [
|
||||
{
|
||||
"code": "T814",
|
||||
"position": "3",
|
||||
"libelle": "INFECT. APRES UN ACTE, NCA"
|
||||
}
|
||||
]
|
||||
},
|
||||
"actes_etab": [],
|
||||
"actes_reco": [],
|
||||
"ghm_etab": "06C043",
|
||||
"ghs_etab": "1941",
|
||||
"ghm_reco": "06C042",
|
||||
"ghs_reco": "1940",
|
||||
"recodage_impactant": "1",
|
||||
"ghs_injustifie": "0",
|
||||
"accord_desaccord": "désaccord",
|
||||
"praticien_conseil": "DR VIGNAÚ",
|
||||
"_validation": {
|
||||
"summary": {
|
||||
"valid": 8,
|
||||
"invalid": 2,
|
||||
"empty": 0,
|
||||
"total_codes": 10,
|
||||
"ghm_ghs_incoherents": 0
|
||||
},
|
||||
"codage_etab": {
|
||||
"dp": {
|
||||
"valid": true,
|
||||
"libelle_ref": "Côlon"
|
||||
},
|
||||
"dr": {
|
||||
"valid": false,
|
||||
"suggestion": "A022"
|
||||
},
|
||||
"das": [
|
||||
{
|
||||
"valid": true,
|
||||
"libelle_ref": "Infection après un acte à visée diagnostique et thérapeutique, non classée ailleurs"
|
||||
}
|
||||
]
|
||||
},
|
||||
"codage_reco": {
|
||||
"dp": {
|
||||
"valid": true,
|
||||
"libelle_ref": "Côlon"
|
||||
},
|
||||
"dr": {
|
||||
"valid": false,
|
||||
"suggestion": "A022"
|
||||
},
|
||||
"das": [
|
||||
{
|
||||
"valid": true,
|
||||
"libelle_ref": "Infection après un acte à visée diagnostique et thérapeutique, non classée ailleurs"
|
||||
}
|
||||
]
|
||||
},
|
||||
"ghm_etab": {
|
||||
"valid": true
|
||||
},
|
||||
"ghs_etab": {
|
||||
"valid": true
|
||||
},
|
||||
"ghm_reco": {
|
||||
"valid": true
|
||||
},
|
||||
"ghs_reco": {
|
||||
"valid": true
|
||||
},
|
||||
"cross_checks": {
|
||||
"etab_ghm_ghs_coherent": true,
|
||||
"reco_ghm_ghs_coherent": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"concertation_2": {
|
||||
"ghs_initial": "",
|
||||
"ghs_avant_concertation": "",
|
||||
"ghs_final": "",
|
||||
"decision": "",
|
||||
"date_concertation": "2.3.18",
|
||||
"praticien_controleur": "",
|
||||
"medecin_dim": "",
|
||||
"_validation": {
|
||||
"ghs_initial": {
|
||||
"code": "",
|
||||
"valid": null
|
||||
},
|
||||
"ghs_avant_concertation": {
|
||||
"code": "",
|
||||
"valid": null
|
||||
},
|
||||
"ghs_final": {
|
||||
"code": "",
|
||||
"valid": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"concertation_1": {
|
||||
"date_concertation": "2.3.18",
|
||||
"argumentaire": "Désaccord : (les éléments couverts par le secret médical sont à mentionner sur la fiche médicale de concertation)"
|
||||
},
|
||||
"preuves": {
|
||||
"date": "",
|
||||
"praticien_controleur": "",
|
||||
"medecin_dim": "",
|
||||
"pieces": [
|
||||
{
|
||||
"intitule": "Patient de 57 ans, puis en charge pour une colo-rectalgie et l'anapath. Confirme le Kc du colon.",
|
||||
"present": true,
|
||||
"photocopie": false,
|
||||
"absent_date": "",
|
||||
"date_obtention": ""
|
||||
},
|
||||
{
|
||||
"intitule": "En post-op, présence d'un escoulement au niveau du bas de la muqueuse ; retrait d'un aquafili et luchage.",
|
||||
"present": true,
|
||||
"photocopie": false,
|
||||
"absent_date": "",
|
||||
"date_obtention": ""
|
||||
},
|
||||
{
|
||||
"intitule": "Il n'a pas d'abcès : une déficatga. Des JAS en T81.8. Pas de moton. Disphagie restaurée, pas de prétendu ad. de l'escoulement.",
|
||||
"present": true,
|
||||
"photocopie": false,
|
||||
"absent_date": "",
|
||||
"date_obtention": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"_meta": {
|
||||
"pipeline_version": "v1",
|
||||
"ocr_model": "zai-org/GLM-OCR",
|
||||
"generated_at": "2026-04-24T12:11:05+00:00"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user