199 lines
7.5 KiB
Python
199 lines
7.5 KiB
Python
"""Mesure du gain perf RPA_SKIP_INTENTION_ENRICHMENT sur build_replay.
|
|
|
|
Harnais lecture seule : charge une fixture raw events réelle (smoke Bloc-notes
|
|
2026-05-20 - même session que replay_sess_e96e5822 18/18 du 2026-05-25) et
|
|
appelle directement build_replay_from_raw_events() sans déclencher dispatch
|
|
ni replay live.
|
|
|
|
Ne pas lancer en CI standard : test perf, run manuel uniquement.
|
|
|
|
Run :
|
|
.venv/bin/python -m pytest tests/integration/test_build_replay_perf.py \
|
|
-m performance -s -v
|
|
|
|
Référence : inbox_claude/2026-05-25_1244_codex-to-claude_recadrage-demo-1juin.md
|
|
(mission C2) et plan docs/plans/PLAN_STABILISATION_DEMO_2026-06-01.md
|
|
(P0 performance mesurable).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[2]
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
|
|
SESSION_DIR = (
|
|
ROOT
|
|
/ "data"
|
|
/ "training"
|
|
/ "live_sessions"
|
|
/ "DESKTOP-58D5CAC_windows"
|
|
/ "sess_20260520T102916_066851"
|
|
)
|
|
FIXTURE = SESSION_DIR / "live_events.jsonl"
|
|
|
|
|
|
def _load_raw_events() -> list:
|
|
"""Charge la fixture raw events réelle (55 events bruts, 16 actions utiles)."""
|
|
if not FIXTURE.exists():
|
|
pytest.skip(f"Fixture absente : {FIXTURE}")
|
|
with FIXTURE.open() as f:
|
|
return [json.loads(line) for line in f if line.strip()]
|
|
|
|
|
|
@pytest.fixture
|
|
def raw_events():
|
|
return _load_raw_events()
|
|
|
|
|
|
@pytest.fixture
|
|
def session_dir() -> str:
|
|
"""Chemin vers session_dir (déclenche l'enrichissement gemma4 si présent)."""
|
|
if not SESSION_DIR.exists():
|
|
pytest.skip(f"Session dir absent : {SESSION_DIR}")
|
|
return str(SESSION_DIR)
|
|
|
|
|
|
def _extract_perf_breakdown(caplog) -> list[tuple[str, float]]:
|
|
"""Extrait les spans [PERF] build.step* des logs capturés.
|
|
|
|
Format attendu : "[PERF] build.<step_name> session=<sid> elapsed_ms=<X>"
|
|
Retourne [(step_name, elapsed_ms)] dans l'ordre d'apparition.
|
|
"""
|
|
import re
|
|
pattern = re.compile(r"\[PERF\] build\.(\S+) session=\S+ elapsed_ms=([\d.]+)")
|
|
out = []
|
|
for record in caplog.records:
|
|
m = pattern.search(record.getMessage())
|
|
if m:
|
|
out.append((m.group(1), float(m.group(2))))
|
|
return out
|
|
|
|
|
|
@pytest.mark.performance
|
|
def test_build_replay_perf_skip_enrichment(monkeypatch, raw_events, session_dir, caplog):
|
|
"""Mesure build_replay_from_raw_events avec et sans RPA_SKIP_INTENTION_ENRICHMENT.
|
|
|
|
Asserts :
|
|
- skip enrichissement est au moins 3x plus rapide
|
|
- même nombre d'actions produites dans les 2 modes
|
|
- skip → 0 actions avec intention non-vide
|
|
- full → au moins 1 action avec intention (preuve que gemma4 a tourné)
|
|
|
|
Print [PERF] explicite des deux mesures (capturé via -s).
|
|
"""
|
|
import logging
|
|
from agent_v0.server_v1.stream_processor import build_replay_from_raw_events
|
|
|
|
# Capter les logs INFO du stream_processor pour récupérer les spans [PERF]
|
|
caplog.set_level(logging.INFO, logger="agent_v0.server_v1.stream_processor")
|
|
|
|
# Premier run : enrichissement actif (comportement legacy)
|
|
monkeypatch.delenv("RPA_SKIP_INTENTION_ENRICHMENT", raising=False)
|
|
monkeypatch.delenv("RPA_SKIP_ENRICHMENT", raising=False)
|
|
t0 = time.perf_counter()
|
|
actions_full = build_replay_from_raw_events(
|
|
raw_events, session_id="perf_full", session_dir=session_dir
|
|
)
|
|
elapsed_full_ms = (time.perf_counter() - t0) * 1000
|
|
breakdown_full = _extract_perf_breakdown(caplog)
|
|
caplog.clear()
|
|
|
|
# Second run : skip enrichissement activé (Phase 1 Codex 2026-05-25)
|
|
monkeypatch.setenv("RPA_SKIP_INTENTION_ENRICHMENT", "1")
|
|
t0 = time.perf_counter()
|
|
actions_skip = build_replay_from_raw_events(
|
|
raw_events, session_id="perf_skip", session_dir=session_dir
|
|
)
|
|
elapsed_skip_ms = (time.perf_counter() - t0) * 1000
|
|
breakdown_skip = _extract_perf_breakdown(caplog)
|
|
|
|
speedup = elapsed_full_ms / max(1.0, elapsed_skip_ms)
|
|
intentions_full = sum(1 for a in actions_full if a.get("intention"))
|
|
intentions_skip = sum(1 for a in actions_skip if a.get("intention"))
|
|
|
|
print(
|
|
f"\n[PERF] build_replay events={len(raw_events)} "
|
|
f"actions_full={len(actions_full)} actions_skip={len(actions_skip)} "
|
|
f"full_ms={elapsed_full_ms:.0f} skip_ms={elapsed_skip_ms:.0f} "
|
|
f"speedup={speedup:.1f}x "
|
|
f"intentions_full={intentions_full} intentions_skip={intentions_skip}"
|
|
)
|
|
|
|
# Décomposition par étape (C2b) — utile pour identifier les vraies cibles
|
|
# d'optimisation post-skip enrichissement.
|
|
def _format_breakdown(label: str, b: list[tuple[str, float]]) -> str:
|
|
if not b:
|
|
return f" {label}: (aucun span [PERF] capturé)"
|
|
lines = [f" {label}:"]
|
|
for step, ms in b:
|
|
bar = "█" * max(1, int(ms / 500)) # 1 char par 500ms
|
|
lines.append(f" {step:40s} {ms:>7.0f} ms {bar}")
|
|
return "\n".join(lines)
|
|
|
|
print(_format_breakdown("Décomposition FULL", breakdown_full))
|
|
print(_format_breakdown("Décomposition SKIP", breakdown_skip))
|
|
|
|
# Invariants — même nombre d'actions, juste les champs intention en moins
|
|
assert len(actions_skip) == len(actions_full), (
|
|
f"Le skip ne doit pas changer le nombre d'actions "
|
|
f"(full={len(actions_full)}, skip={len(actions_skip)})"
|
|
)
|
|
|
|
# Skip → 0 actions avec intention enrichie
|
|
assert intentions_skip == 0, (
|
|
f"Skip enrichment doit produire 0 intention non-vide "
|
|
f"(observé : {intentions_skip})"
|
|
)
|
|
|
|
# Full → au moins 1 action avec intention (sinon gemma4 a planté ou la
|
|
# fixture n'a pas d'action eligible). Si 0, c'est anormal et on échoue
|
|
# bruyamment.
|
|
assert intentions_full > 0, (
|
|
f"Full enrichment doit produire au moins 1 intention non-vide "
|
|
f"sur fixture {FIXTURE.name}. Si 0 → gemma4 indisponible ou fixture "
|
|
f"non éligible (toutes les actions filtrées avant enrichissement)."
|
|
)
|
|
|
|
# Gain perf minimum : 3x.
|
|
# Mesure réelle observée (2026-05-25 sur fixture 16 actions, 9 enrichies) :
|
|
# full=93.8s, skip=24.1s, speedup=3.9x.
|
|
# Le mode skip n'est pas instantané (~24s) car d'autres étapes consomment
|
|
# du temps : extraction crops d'ancrage pour clics visual_mode, consolidation
|
|
# avec ReplayLearner, normalisation des waits, etc. Seul gemma4 est skippé.
|
|
# Estimation initiale 215x était basée sur l'hypothèse "gemma4 seul gros
|
|
# coût" — invalidée par la mesure.
|
|
assert speedup >= 3.0, (
|
|
f"Gain insuffisant : {speedup:.1f}x (attendu ≥ 3x). "
|
|
f"Soit gemma4 cache-hit, soit la fixture n'a pas d'action éligible, "
|
|
f"soit Ollama indisponible (fallback rapide). full_ms={elapsed_full_ms:.0f}, "
|
|
f"skip_ms={elapsed_skip_ms:.0f}."
|
|
)
|
|
|
|
|
|
@pytest.mark.performance
|
|
def test_build_replay_skip_alias_works(monkeypatch, raw_events, session_dir):
|
|
"""Vérifie que l'alias RPA_SKIP_ENRICHMENT a le même effet."""
|
|
from agent_v0.server_v1.stream_processor import build_replay_from_raw_events
|
|
|
|
monkeypatch.delenv("RPA_SKIP_INTENTION_ENRICHMENT", raising=False)
|
|
monkeypatch.setenv("RPA_SKIP_ENRICHMENT", "1")
|
|
actions = build_replay_from_raw_events(
|
|
raw_events, session_id="perf_alias", session_dir=session_dir
|
|
)
|
|
intentions = sum(1 for a in actions if a.get("intention"))
|
|
|
|
print(f"\n[PERF] alias RPA_SKIP_ENRICHMENT actions={len(actions)} intentions={intentions}")
|
|
assert intentions == 0, (
|
|
f"L'alias RPA_SKIP_ENRICHMENT doit aussi désactiver l'enrichissement "
|
|
f"(observé : {intentions} intentions)"
|
|
)
|