feat(extraction): lecture de tableau structurée (grille bbox+confiance)
Nouvelle extract_grid_from_image() : reconstruit une grille List[List[cell]] (lignes ET colonnes par clustering des centres y/x des tokens EasyOCR), en conservant bbox + confiance + (row,col) par cellule. Contrairement à extract_table_from_image (liste plate, coordonnée x jetée) — laissé intact. Brique 1 de la verticale extraction dossier patient. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
79
tests/unit/test_extract_grid.py
Normal file
79
tests/unit/test_extract_grid.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Tests pour extract_grid_from_image — lecture de tableau STRUCTURÉE.
|
||||
|
||||
Contrairement à extract_table_from_image (qui jette x et retourne une liste
|
||||
plate triée par y), extract_grid_from_image reconstruit une vraie grille
|
||||
List[List[cell]] : clustering des lignes par proximité y, des colonnes par
|
||||
proximité x. bbox + confiance conservées par cellule.
|
||||
|
||||
Les tokens OCR sont injectés (mock du reader EasyOCR) → pas de PNG réel,
|
||||
pas de GPU.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
from PIL import Image
|
||||
|
||||
import core.llm.ocr_extractor as ocr_extractor
|
||||
|
||||
|
||||
def _blank_png(path: Path) -> None:
|
||||
Image.new("RGB", (300, 120), "white").save(path)
|
||||
|
||||
|
||||
def _bbox(x0: float, y0: float, x1: float, y1: float):
|
||||
"""bbox EasyOCR = 4 points [tl, tr, br, bl], chaque point (x, y)."""
|
||||
return [[x0, y0], [x1, y0], [x1, y1], [x0, y1]]
|
||||
|
||||
|
||||
def _fake_reader(tokens):
|
||||
"""Reader factice : readtext() renvoie la liste (bbox, text, conf) fournie."""
|
||||
return SimpleNamespace(readtext=lambda *a, **k: tokens)
|
||||
|
||||
|
||||
def test_extract_grid_2x3(tmp_path, monkeypatch):
|
||||
image_path = tmp_path / "table.png"
|
||||
_blank_png(image_path)
|
||||
|
||||
# 2 lignes (y≈10 et y≈60) × 3 colonnes (x≈10, x≈110, x≈210).
|
||||
# Volontairement mélangées dans l'ordre OCR pour vérifier le tri.
|
||||
tokens = [
|
||||
(_bbox(110, 58, 160, 78), "B2", 0.97),
|
||||
(_bbox(10, 10, 60, 30), "A1", 0.91),
|
||||
(_bbox(210, 12, 260, 32), "C1", 0.88),
|
||||
(_bbox(210, 60, 260, 80), "C2", 0.95),
|
||||
(_bbox(10, 60, 60, 80), "A2", 0.90),
|
||||
(_bbox(110, 8, 160, 28), "B1", 0.93),
|
||||
]
|
||||
monkeypatch.setattr(ocr_extractor, "_get_reader", lambda: _fake_reader(tokens))
|
||||
|
||||
grid = ocr_extractor.extract_grid_from_image(str(image_path))
|
||||
|
||||
# Grille 2×3 ordonnée
|
||||
assert len(grid) == 2, "doit détecter 2 lignes"
|
||||
assert all(len(row) == 3 for row in grid), "chaque ligne doit avoir 3 colonnes"
|
||||
|
||||
texts = [[cell["text"] for cell in row] for row in grid]
|
||||
assert texts == [["A1", "B1", "C1"], ["A2", "B2", "C2"]]
|
||||
|
||||
# Métadonnées conservées + indices row/col cohérents
|
||||
cell = grid[0][2]
|
||||
assert cell["text"] == "C1"
|
||||
assert cell["confidence"] == 0.88
|
||||
assert cell["bbox"] == _bbox(210, 12, 260, 32)
|
||||
assert cell["row"] == 0
|
||||
assert cell["col"] == 2
|
||||
assert grid[1][0]["row"] == 1 and grid[1][0]["col"] == 0
|
||||
|
||||
|
||||
def test_extract_grid_empty_when_no_tokens(tmp_path, monkeypatch):
|
||||
image_path = tmp_path / "blank.png"
|
||||
_blank_png(image_path)
|
||||
monkeypatch.setattr(ocr_extractor, "_get_reader", lambda: _fake_reader([]))
|
||||
|
||||
grid = ocr_extractor.extract_grid_from_image(str(image_path))
|
||||
assert grid == []
|
||||
|
||||
|
||||
def test_extract_grid_missing_file_returns_empty():
|
||||
grid = ocr_extractor.extract_grid_from_image("/no/such/file.png")
|
||||
assert grid == []
|
||||
Reference in New Issue
Block a user