fix: align model name to gemma3:27b-cloud + add architecture diagram & deployment zip

setup.sh and README.md referenced gemma3:27b-it-qat while config.py uses
gemma3:27b-cloud. Added architecture.html (Mermaid pipeline diagram) and
t2a-extractor.zip for collaborator deployment.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-23 21:31:57 +01:00
parent f70d138db3
commit 751922d032
4 changed files with 119 additions and 4 deletions

115
architecture.html Normal file
View File

@@ -0,0 +1,115 @@
<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="UTF-8">
<title>T2A Extractor — Architecture</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: #f5f5f5;
display: flex;
flex-direction: column;
align-items: center;
padding: 2rem;
margin: 0;
}
h1 { color: #333; margin-bottom: 0.5rem; }
p.subtitle { color: #666; margin-top: 0; }
.mermaid {
background: white;
border-radius: 12px;
padding: 2rem;
box-shadow: 0 2px 12px rgba(0,0,0,0.1);
max-width: 95vw;
overflow-x: auto;
}
</style>
</head>
<body>
<h1>T2A Extractor</h1>
<p class="subtitle">Pipeline d'extraction structur&eacute;e de rapports UCR</p>
<div class="mermaid">
flowchart TD
subgraph INPUT["&#x1F4C4; Entr&eacute;e"]
PDF["PDF UCR&lt;br/&gt;natif + scann&eacute;"]
end
subgraph ETAPE1["&#x1F4D6; &Eacute;tape 1 &mdash; Extraction texte"]
DETECT{"Page native&lt;br/&gt;ou scann&eacute;e ?"}
PYMUPDF["&lt;b&gt;PyMuPDF&lt;/b&gt;&lt;br/&gt;texte natif"]
DOCTR["&lt;b&gt;docTR + Torch&lt;/b&gt;&lt;br/&gt;OCR"]
MERGE(["Texte brut complet"])
end
subgraph ETAPE2["&#x2702;&#xFE0F; &Eacute;tape 2 &mdash; Segmentation"]
REGEX["Regex&lt;br/&gt;par Champ + OGC"]
OGC_BLOCKS["Blocs OGC&lt;br/&gt;individuels / group&eacute;s"]
CHAMP_BLOCKS["Blocs Champ&lt;br/&gt;d&eacute;cisions globales"]
end
subgraph ETAPE3["&#x1F916; &Eacute;tape 3 &mdash; Extraction structur&eacute;e"]
OLLAMA["&lt;b&gt;Ollama&lt;/b&gt;&lt;br/&gt;gemma3:27b-cloud"]
JSON["JSON structur&eacute;&lt;br/&gt;11 champs par OGC"]
end
subgraph ETAPE35["&#x1F527; &Eacute;tape 3.5 &mdash; Normalisation"]
CIM10["Correction codes CIM-10&lt;br/&gt;&amp;bull; OCR chiffre &harr; lettre&lt;br/&gt;&amp;bull; Point manquant / mal plac&eacute;&lt;br/&gt;&amp;bull; D&eacute;cimales exc&eacute;dentaires"]
RETENUS["Auto-remplissage&lt;br/&gt;codes_retenus"]
TEXTE["Fallback regex&lt;br/&gt;texte_decision"]
end
subgraph ETAPE4["&#x2705; &Eacute;tape 4 &mdash; Validation"]
VALID["V&eacute;rification formats&lt;br/&gt;CIM-10 / CCAM"]
SAFETY["Safety-net&lt;br/&gt;2e passe normalizer"]
COHERENCE["Coh&eacute;rence&lt;br/&gt;d&eacute;cision &harr; codes"]
end
subgraph ETAPE5["&#x1F4CA; &Eacute;tape 5 &mdash; Export"]
EXCEL["&lt;b&gt;Excel&lt;/b&gt; .xlsx&lt;br/&gt;coloration d&eacute;cisions"]
CSV["&lt;b&gt;CSV&lt;/b&gt;&lt;br/&gt;optionnel"]
end
PDF --> DETECT
DETECT -->|"&ge; 50 chars"| PYMUPDF
DETECT -->|"&lt; 50 chars"| DOCTR
PYMUPDF --> MERGE
DOCTR --> MERGE
MERGE --> REGEX
REGEX --> OGC_BLOCKS
REGEX --> CHAMP_BLOCKS
OGC_BLOCKS --> OLLAMA
CHAMP_BLOCKS --> OLLAMA
OLLAMA --> JSON
JSON --> CIM10
CIM10 --> RETENUS
RETENUS --> TEXTE
TEXTE --> VALID
VALID --> SAFETY
SAFETY --> COHERENCE
COHERENCE --> EXCEL
COHERENCE --> CSV
style INPUT fill:#e8f4fd,stroke:#2196F3,stroke-width:2px,color:#000
style ETAPE1 fill:#fff3e0,stroke:#FF9800,stroke-width:2px,color:#000
style ETAPE2 fill:#f3e5f5,stroke:#9C27B0,stroke-width:2px,color:#000
style ETAPE3 fill:#e8f5e9,stroke:#4CAF50,stroke-width:2px,color:#000
style ETAPE35 fill:#fce4ec,stroke:#E91E63,stroke-width:2px,color:#000
style ETAPE4 fill:#fff8e1,stroke:#FFC107,stroke-width:2px,color:#000
style ETAPE5 fill:#e0f2f1,stroke:#009688,stroke-width:2px,color:#000
</div>
<script src="https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js"></script>
<script>
mermaid.initialize({
startOnLoad: true,
theme: 'default',
flowchart: { useMaxWidth: true, htmlLabels: true, curve: 'basis' },
securityLevel: 'loose'
});
</script>
</body>
</html>