feat(vwb): add dashboard competence testing and health tools
This commit is contained in:
62
tests/unit/test_ocr_extractor_tesseract.py
Normal file
62
tests/unit/test_ocr_extractor_tesseract.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
|
||||
from PIL import Image
|
||||
|
||||
import core.llm.ocr_extractor as ocr_extractor
|
||||
|
||||
|
||||
def _blank_png(path: Path) -> None:
|
||||
Image.new("RGB", (120, 40), "white").save(path)
|
||||
|
||||
|
||||
def test_extract_digits_tesseract_filters_numeric_pattern(tmp_path, monkeypatch):
|
||||
image_path = tmp_path / "screen.png"
|
||||
_blank_png(image_path)
|
||||
|
||||
def fake_image_to_string(_img, lang, config):
|
||||
assert lang == "eng"
|
||||
assert "tessedit_char_whitelist=0123456789" in config
|
||||
return "IPP 25003284 MOREL\n25003362 abc 1234\n25012257"
|
||||
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"pytesseract",
|
||||
SimpleNamespace(image_to_string=fake_image_to_string),
|
||||
)
|
||||
|
||||
values = ocr_extractor.extract_digits_tesseract_from_image(
|
||||
str(image_path),
|
||||
pattern=r"^25\d{6}$",
|
||||
)
|
||||
|
||||
assert values == ["25003284", "25003362", "25012257"]
|
||||
|
||||
|
||||
def test_extract_table_tesseract_engine_delegates_to_digits(tmp_path, monkeypatch):
|
||||
image_path = tmp_path / "screen.png"
|
||||
_blank_png(image_path)
|
||||
|
||||
calls = {}
|
||||
|
||||
def fake_extract_digits(image_path_arg, region=None, pattern=None, limit=None):
|
||||
calls["args"] = (image_path_arg, region, pattern, limit)
|
||||
return ["25003284", "25003362"]
|
||||
|
||||
monkeypatch.setattr(
|
||||
ocr_extractor,
|
||||
"extract_digits_tesseract_from_image",
|
||||
fake_extract_digits,
|
||||
)
|
||||
|
||||
values = ocr_extractor.extract_table_from_image(
|
||||
str(image_path),
|
||||
region=(10, 20, 30, 40),
|
||||
pattern=r"^25\d{6}$",
|
||||
limit=2,
|
||||
engine="tesseract",
|
||||
)
|
||||
|
||||
assert values == ["25003284", "25003362"]
|
||||
assert calls["args"] == (str(image_path), (10, 20, 30, 40), r"^25\d{6}$", 2)
|
||||
Reference in New Issue
Block a user