LogShipperHandler + LogShipper : buffer borne, flush par batch <= max, resilience 0-perte (rejeu sur echec), sender injectable. Flag RPA_LOG_SHIP_ENABLED (defaut off, activable par config.txt sans rebuild). Sanitizer client = identite (rempart PII = serveur, cf commit precedent). Wiring gated dans main.py. 8 tests TDD. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
221 lines
7.5 KiB
Python
221 lines
7.5 KiB
Python
"""TDD — push-log-DGX : log shipper client Léa (remontée auto des logs).
|
|
|
|
Le serveur expose déjà `POST /api/v1/agents/logs` (body
|
|
`{machine_id, logs:[{ts, level, logger, message}]}`, borne
|
|
`RPA_AGENT_LOGS_MAX_BATCH`). Côté client, on veut :
|
|
|
|
- `LogShipperHandler(logging.Handler)` : sur `emit`, formate un LogRecord
|
|
au schéma exact `{ts, level, logger, message}`, applique un assainissement
|
|
PII au message, et empile dans un buffer.
|
|
- `LogShipper` : flush périodique du buffer par BATCH (≤ max_batch) via un
|
|
`sender` callable INJECTABLE `(machine_id, logs) -> bool`. Résilience :
|
|
si `sender` renvoie False ou lève, les logs RESTENT (rejoués au flush
|
|
suivant — ZÉRO perte ; conformité AI Act Art. 12).
|
|
|
|
Le module est chargé par chemin (importlib) pour ne dépendre d'aucun import
|
|
lourd du package client (cf. DETTE-011/013, comme test_agent_v1_logging.py).
|
|
"""
|
|
import importlib.util
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
_MOD_PATH = (
|
|
Path(__file__).resolve().parents[2]
|
|
/ "agent_v0" / "agent_v1" / "network" / "log_shipper.py"
|
|
)
|
|
|
|
|
|
def _load_module():
|
|
spec = importlib.util.spec_from_file_location("lea_log_shipper", _MOD_PATH)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(mod)
|
|
return mod
|
|
|
|
|
|
@pytest.fixture
|
|
def mod():
|
|
return _load_module()
|
|
|
|
|
|
def _make_record(name="lea.test", level=logging.INFO, msg="hello %s", args=("world",)):
|
|
"""Construit un vrai LogRecord (pas un mock) pour tester le formatage."""
|
|
return logging.LogRecord(
|
|
name=name, level=level, pathname=__file__, lineno=1,
|
|
msg=msg, args=args, exc_info=None,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 1. emit formate un LogRecord au schéma exact {ts, level, logger, message}
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_emit_formate_au_schema_exact(mod):
|
|
shipper = mod.LogShipper(machine_id="poste-1", sender=lambda m, l: True)
|
|
handler = shipper.handler
|
|
|
|
handler.emit(_make_record(name="lea.captor", level=logging.WARNING,
|
|
msg="bonjour %s", args=("monde",)))
|
|
|
|
buffered = shipper.peek_buffer()
|
|
assert len(buffered) == 1
|
|
entry = buffered[0]
|
|
# Schéma EXACT : pas de clé en plus, pas de clé en moins.
|
|
assert set(entry.keys()) == {"ts", "level", "logger", "message"}
|
|
assert entry["level"] == "WARNING"
|
|
assert entry["logger"] == "lea.captor"
|
|
assert entry["message"] == "bonjour monde" # args interpolés
|
|
assert isinstance(entry["ts"], (int, float))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 2. log_safe / assainissement PII appliqué au message avant envoi
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_pii_assaini_avant_envoi(mod):
|
|
# Sanitizer injecté déterministe : PII -> token (mime anonymize_text).
|
|
def fake_sanitizer(text):
|
|
return text.replace("ROSSIGNOL", "[NOM_1]")
|
|
|
|
shipper = mod.LogShipper(
|
|
machine_id="poste-1", sender=lambda m, l: True,
|
|
message_sanitizer=fake_sanitizer,
|
|
)
|
|
shipper.handler.emit(_make_record(msg="clic sur patient ROSSIGNOL", args=None))
|
|
|
|
entry = shipper.peek_buffer()[0]
|
|
assert "ROSSIGNOL" not in entry["message"]
|
|
assert "[NOM_1]" in entry["message"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 3. flush envoie un batch <= max et appelle sender(machine_id, logs)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_flush_envoie_batch_borne_et_appelle_sender(mod):
|
|
calls = []
|
|
|
|
def sender(machine_id, logs):
|
|
calls.append((machine_id, logs))
|
|
return True
|
|
|
|
shipper = mod.LogShipper(machine_id="poste-42", sender=sender, max_batch=10)
|
|
for i in range(5):
|
|
shipper.handler.emit(_make_record(msg=f"event {i}", args=None))
|
|
|
|
sent = shipper.flush()
|
|
|
|
assert sent == 5
|
|
assert len(calls) == 1
|
|
machine_id, logs = calls[0]
|
|
assert machine_id == "poste-42"
|
|
assert len(logs) == 5
|
|
assert logs[0]["message"] == "event 0"
|
|
# Buffer vidé après succès
|
|
assert shipper.peek_buffer() == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 4. sender échoue (False / exception) -> logs CONSERVÉS, rejoués au flush suivant
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_sender_echec_false_conserve_les_logs(mod):
|
|
state = {"fail": True, "received": None}
|
|
|
|
def flaky_sender(machine_id, logs):
|
|
if state["fail"]:
|
|
return False # échec récupérable
|
|
state["received"] = list(logs)
|
|
return True
|
|
|
|
shipper = mod.LogShipper(machine_id="p", sender=flaky_sender)
|
|
for i in range(3):
|
|
shipper.handler.emit(_make_record(msg=f"m{i}", args=None))
|
|
|
|
sent = shipper.flush() # échec
|
|
assert sent == 0
|
|
assert len(shipper.peek_buffer()) == 3 # ZÉRO perte
|
|
|
|
state["fail"] = False
|
|
sent = shipper.flush() # rejeu
|
|
assert sent == 3
|
|
assert [e["message"] for e in state["received"]] == ["m0", "m1", "m2"]
|
|
assert shipper.peek_buffer() == []
|
|
|
|
|
|
def test_sender_exception_conserve_les_logs(mod):
|
|
def exploding_sender(machine_id, logs):
|
|
raise ConnectionError("serveur down")
|
|
|
|
shipper = mod.LogShipper(machine_id="p", sender=exploding_sender)
|
|
shipper.handler.emit(_make_record(msg="important", args=None))
|
|
|
|
sent = shipper.flush() # ne doit PAS propager
|
|
assert sent == 0
|
|
assert len(shipper.peek_buffer()) == 1 # log conservé
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 5. buffer vide -> sender NON appelé
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_buffer_vide_sender_non_appele(mod):
|
|
calls = []
|
|
shipper = mod.LogShipper(
|
|
machine_id="p", sender=lambda m, l: calls.append((m, l)) or True
|
|
)
|
|
|
|
sent = shipper.flush()
|
|
|
|
assert sent == 0
|
|
assert calls == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 6. > max_batch entrées -> découpage en plusieurs batches
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_decoupage_en_plusieurs_batches(mod):
|
|
batches = []
|
|
|
|
def sender(machine_id, logs):
|
|
batches.append(len(logs))
|
|
return True
|
|
|
|
shipper = mod.LogShipper(machine_id="p", sender=sender, max_batch=3)
|
|
for i in range(7):
|
|
shipper.handler.emit(_make_record(msg=f"x{i}", args=None))
|
|
|
|
sent = shipper.flush()
|
|
|
|
assert sent == 7
|
|
# 7 entrées, max_batch=3 -> 3 + 3 + 1
|
|
assert batches == [3, 3, 1]
|
|
# Chaque batch <= max_batch
|
|
assert all(n <= 3 for n in batches)
|
|
assert shipper.peek_buffer() == []
|
|
|
|
|
|
def test_decoupage_echec_partiel_conserve_le_reste(mod):
|
|
"""Si un batch intermédiaire échoue, on arrête et on garde le reste (0 perte)."""
|
|
batches = []
|
|
|
|
def sender(machine_id, logs):
|
|
batches.append([e["message"] for e in logs])
|
|
# Le 2e batch échoue
|
|
return len(batches) != 2
|
|
|
|
shipper = mod.LogShipper(machine_id="p", sender=sender, max_batch=2)
|
|
for i in range(6):
|
|
shipper.handler.emit(_make_record(msg=f"x{i}", args=None))
|
|
|
|
sent = shipper.flush()
|
|
|
|
# 1er batch (x0,x1) part ; 2e (x2,x3) échoue -> on arrête.
|
|
assert sent == 2
|
|
assert batches[0] == ["x0", "x1"]
|
|
# x2..x5 restent dans le buffer dans l'ordre.
|
|
restant = [e["message"] for e in shipper.peek_buffer()]
|
|
assert restant == ["x2", "x3", "x4", "x5"]
|