fix: align model name to gemma3:27b-cloud + add architecture diagram & deployment zip
setup.sh and README.md referenced gemma3:27b-it-qat while config.py uses gemma3:27b-cloud. Added architecture.html (Mermaid pipeline diagram) and t2a-extractor.zip for collaborator deployment. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
115
architecture.html
Normal file
115
architecture.html
Normal file
@@ -0,0 +1,115 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fr">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>T2A Extractor — Architecture</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
background: #f5f5f5;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
padding: 2rem;
|
||||
margin: 0;
|
||||
}
|
||||
h1 { color: #333; margin-bottom: 0.5rem; }
|
||||
p.subtitle { color: #666; margin-top: 0; }
|
||||
.mermaid {
|
||||
background: white;
|
||||
border-radius: 12px;
|
||||
padding: 2rem;
|
||||
box-shadow: 0 2px 12px rgba(0,0,0,0.1);
|
||||
max-width: 95vw;
|
||||
overflow-x: auto;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>T2A Extractor</h1>
|
||||
<p class="subtitle">Pipeline d'extraction structurée de rapports UCR</p>
|
||||
<div class="mermaid">
|
||||
flowchart TD
|
||||
subgraph INPUT["📄 Entrée"]
|
||||
PDF["PDF UCR<br/>natif + scanné"]
|
||||
end
|
||||
|
||||
subgraph ETAPE1["📖 Étape 1 — Extraction texte"]
|
||||
DETECT{"Page native<br/>ou scannée ?"}
|
||||
PYMUPDF["<b>PyMuPDF</b><br/>texte natif"]
|
||||
DOCTR["<b>docTR + Torch</b><br/>OCR"]
|
||||
MERGE(["Texte brut complet"])
|
||||
end
|
||||
|
||||
subgraph ETAPE2["✂️ Étape 2 — Segmentation"]
|
||||
REGEX["Regex<br/>par Champ + OGC"]
|
||||
OGC_BLOCKS["Blocs OGC<br/>individuels / groupés"]
|
||||
CHAMP_BLOCKS["Blocs Champ<br/>décisions globales"]
|
||||
end
|
||||
|
||||
subgraph ETAPE3["🤖 Étape 3 — Extraction structurée"]
|
||||
OLLAMA["<b>Ollama</b><br/>gemma3:27b-cloud"]
|
||||
JSON["JSON structuré<br/>11 champs par OGC"]
|
||||
end
|
||||
|
||||
subgraph ETAPE35["🔧 Étape 3.5 — Normalisation"]
|
||||
CIM10["Correction codes CIM-10<br/>&bull; OCR chiffre ↔ lettre<br/>&bull; Point manquant / mal placé<br/>&bull; Décimales excédentaires"]
|
||||
RETENUS["Auto-remplissage<br/>codes_retenus"]
|
||||
TEXTE["Fallback regex<br/>texte_decision"]
|
||||
end
|
||||
|
||||
subgraph ETAPE4["✅ Étape 4 — Validation"]
|
||||
VALID["Vérification formats<br/>CIM-10 / CCAM"]
|
||||
SAFETY["Safety-net<br/>2e passe normalizer"]
|
||||
COHERENCE["Cohérence<br/>décision ↔ codes"]
|
||||
end
|
||||
|
||||
subgraph ETAPE5["📊 Étape 5 — Export"]
|
||||
EXCEL["<b>Excel</b> .xlsx<br/>coloration décisions"]
|
||||
CSV["<b>CSV</b><br/>optionnel"]
|
||||
end
|
||||
|
||||
PDF --> DETECT
|
||||
DETECT -->|"≥ 50 chars"| PYMUPDF
|
||||
DETECT -->|"< 50 chars"| DOCTR
|
||||
PYMUPDF --> MERGE
|
||||
DOCTR --> MERGE
|
||||
|
||||
MERGE --> REGEX
|
||||
REGEX --> OGC_BLOCKS
|
||||
REGEX --> CHAMP_BLOCKS
|
||||
|
||||
OGC_BLOCKS --> OLLAMA
|
||||
CHAMP_BLOCKS --> OLLAMA
|
||||
OLLAMA --> JSON
|
||||
|
||||
JSON --> CIM10
|
||||
CIM10 --> RETENUS
|
||||
RETENUS --> TEXTE
|
||||
|
||||
TEXTE --> VALID
|
||||
VALID --> SAFETY
|
||||
SAFETY --> COHERENCE
|
||||
|
||||
COHERENCE --> EXCEL
|
||||
COHERENCE --> CSV
|
||||
|
||||
style INPUT fill:#e8f4fd,stroke:#2196F3,stroke-width:2px,color:#000
|
||||
style ETAPE1 fill:#fff3e0,stroke:#FF9800,stroke-width:2px,color:#000
|
||||
style ETAPE2 fill:#f3e5f5,stroke:#9C27B0,stroke-width:2px,color:#000
|
||||
style ETAPE3 fill:#e8f5e9,stroke:#4CAF50,stroke-width:2px,color:#000
|
||||
style ETAPE35 fill:#fce4ec,stroke:#E91E63,stroke-width:2px,color:#000
|
||||
style ETAPE4 fill:#fff8e1,stroke:#FFC107,stroke-width:2px,color:#000
|
||||
style ETAPE5 fill:#e0f2f1,stroke:#009688,stroke-width:2px,color:#000
|
||||
</div>
|
||||
<script src="https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js"></script>
|
||||
<script>
|
||||
mermaid.initialize({
|
||||
startOnLoad: true,
|
||||
theme: 'default',
|
||||
flowchart: { useMaxWidth: true, htmlLabels: true, curve: 'basis' },
|
||||
securityLevel: 'loose'
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user