feat(p1): persist workflows and semantic learning artifacts

2026-06-02 16:20:38 +02:00
parent 7a1a5cb6fd
commit 86b3c8f7e7
21 changed files with 3816 additions and 31 deletions
--- a/tests/integration/test_phase25_semantic_integration.py
+++ b/tests/integration/test_phase25_semantic_integration.py
@@ -0,0 +1,476 @@
+"""Tests d'intégration Phase 2.5 sémantique.
+
+Specs : ``docs/POC/SPECS_PHASE_25_SEMANTIQUE_2026-06-01.md``.
+
+Vérifie le flux complet :
+- Charger des screenshots disque -> identifier écrans distincts ->
+  analyser avec OmniParser mocké -> écrire ``.semantic.yaml`` valide.
+- Tester aussi le fallback OmniParser KO -> degraded YAML.
+- L'endpoint FastAPI est testé via ``TestClient`` (auth Bearer désactivée
+  via ``RPA_AUTH_DISABLED=true`` pour ce test).
+- Correctifs P1-SEMANTIQUE GO conditionnel Qwen :
+  * non-blocage event loop pendant analyze_frames (run_in_executor).
+  * timeout effectif autour de chaque appel OmniParser.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import sys
+import time
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import pytest
+import yaml
+from PIL import Image, ImageDraw
+
+_ROOT = str(Path(__file__).resolve().parents[2])
+if _ROOT not in sys.path:
+    sys.path.insert(0, _ROOT)
+
+from core.semantic import phase25_analyzer as P  # noqa: E402
+
+
+def _make_screenshot(path: Path, seed: int) -> None:
+    import random
+    rng = random.Random(seed)
+    img = Image.new("RGB", (320, 240), color=(255, 255, 255))
+    d = ImageDraw.Draw(img)
+    for _ in range(30):
+        x = rng.randint(0, 300)
+        y = rng.randint(0, 220)
+        w = rng.randint(10, 30)
+        h = rng.randint(10, 30)
+        col = (rng.randint(0, 255), rng.randint(0, 255), rng.randint(0, 255))
+        d.rectangle([x, y, x + w, y + h], fill=col)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    img.save(path, format="PNG")
+
+
+def _build_mock_omniparser(elements):
+    w = P._OmniParserSafeWrapper.__new__(P._OmniParserSafeWrapper)
+    w._adapter = MagicMock()
+    w._available = True
+    w._import_error = None
+    w._adapter.detect.return_value = elements
+    return w
+
+
+# ---------------------------------------------------------------------------
+# Integration : flux complet load -> analyze -> write YAML
+# ---------------------------------------------------------------------------
+
+
+class TestFullFlow:
+    def test_load_analyze_write_yaml(self, tmp_path, monkeypatch):
+        # Préparer 4 screenshots : 2 visuels identiques + 1 différent + 1 répété.
+        shots_dir = tmp_path / "shots"
+        _make_screenshot(shots_dir / "0.png", seed=1)
+        _make_screenshot(shots_dir / "1.png", seed=1)  # même seed = même image
+        _make_screenshot(shots_dir / "2.png", seed=2)
+        _make_screenshot(shots_dir / "3.png", seed=2)
+        paths = {i: str(shots_dir / f"{i}.png") for i in range(4)}
+
+        monkeypatch.setattr(P, "OMNIPARSER_CACHE_ROOT", tmp_path / "cache")
+
+        mock_op = _build_mock_omniparser([
+            {"label": "Valider", "bbox": [10, 10, 80, 40], "confidence": 0.9, "element_type": "button"},
+            {"label": "Champ texte", "bbox": [100, 10, 300, 40], "confidence": 0.85, "element_type": "input"},
+        ])
+        analyzer = P.Phase25Analyzer(session_id="integ_sess", omniparser=mock_op)
+        frames = P.load_frames_from_paths(paths)
+        assert len(frames) == 4
+        result = analyzer.analyze_frames(
+            frames=frames,
+            screenshot_paths=paths,
+            window_titles={0: "Easily Login", 2: "Easily Patient"},
+        )
+        # Doit avoir regroupé en 2 écrans distincts.
+        assert len(result.screens) == 2
+        assert result.too_complex is False
+        assert result.degraded is False
+        # Le window_title du représentant doit être propagé.
+        rep_indexes = {s.index for s in result.screens}
+        assert 0 in rep_indexes or 1 in rep_indexes
+        assert 2 in rep_indexes or 3 in rep_indexes
+
+        # Écrire le YAML.
+        target = analyzer.write_semantic_yaml(
+            result, slug="competence_demo", target_dir=tmp_path / "out",
+        )
+        assert target.exists()
+        data = yaml.safe_load(target.read_text(encoding="utf-8"))
+        assert data["competence_id"] == "competence_demo"
+        assert len(data["screens"]) == 2
+        # Au moins 1 button + 1 field doivent apparaître dans chaque structure.
+        for sc in data["screens"]:
+            assert len(sc["structure"]["buttons"]) >= 1
+            assert len(sc["structure"]["forms"]) >= 1
+
+    def test_omniparser_ko_fallback_yaml_degraded(self, tmp_path, monkeypatch):
+        shots_dir = tmp_path / "shots"
+        _make_screenshot(shots_dir / "0.png", seed=10)
+        paths = {0: str(shots_dir / "0.png")}
+
+        monkeypatch.setattr(P, "OMNIPARSER_CACHE_ROOT", tmp_path / "cache")
+        monkeypatch.setattr(P, "LOGS_DIR", tmp_path / "logs")
+        monkeypatch.setattr(
+            P, "OMNIPARSER_ERROR_LOG", tmp_path / "logs" / "omniparser_errors.log"
+        )
+        # docTR stub léger.
+        monkeypatch.setattr(
+            P, "_detect_via_doctr",
+            lambda image, screenshot_path: [
+                {"label": "FAKE_OCR", "text": "FAKE_OCR", "bbox": [0, 0, 50, 20], "confidence": 0.6},
+            ],
+        )
+
+        # OmniParser disponible mais qui lève.
+        op = P._OmniParserSafeWrapper.__new__(P._OmniParserSafeWrapper)
+        op._adapter = MagicMock()
+        op._available = True
+        op._import_error = None
+        op._adapter.detect.side_effect = RuntimeError("OmniParser HS")
+
+        analyzer = P.Phase25Analyzer(session_id="integ_fb", omniparser=op)
+        frames = P.load_frames_from_paths(paths)
+        result = analyzer.analyze_frames(frames=frames, screenshot_paths=paths)
+        assert result.degraded is True
+        assert len(result.screens) == 1
+        assert result.screens[0].degraded is True
+
+        # Le YAML doit être marqué dégradé mais valide.
+        target = analyzer.write_semantic_yaml(
+            result, slug="comp_fallback", target_dir=tmp_path / "out",
+        )
+        data = yaml.safe_load(target.read_text(encoding="utf-8"))
+        assert data["degraded"] is True
+        assert data["screens"][0]["degraded"] is True
+
+
+# ---------------------------------------------------------------------------
+# Integration : endpoint FastAPI
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="module")
+def app_client(tmp_path_factory):
+    """TestClient FastAPI avec auth Bearer désactivée."""
+    # IMPORTANT : désactiver l'auth AVANT l'import du module api_stream.
+    os.environ["RPA_AUTH_DISABLED"] = "true"
+    os.environ.setdefault("RPA_API_TOKEN", "test-token-phase25")
+
+    # Cache root pointe vers un tmp partagé.
+    cache_root = tmp_path_factory.mktemp("op_cache")
+
+    from fastapi.testclient import TestClient
+    from agent_v0.server_v1 import api_stream as api  # noqa: E402
+
+    # Redirige le cache et logs Phase 2.5 vers tmp.
+    from core.semantic import phase25_analyzer as PA
+    PA.OMNIPARSER_CACHE_ROOT = cache_root
+    PA.LOGS_DIR = cache_root / "logs"
+    PA.OMNIPARSER_ERROR_LOG = cache_root / "logs" / "omniparser_errors.log"
+
+    client = TestClient(api.app)
+    yield client, cache_root
+
+
+class TestEndpoint:
+    def test_endpoint_returns_empty_result_when_no_paths(self, app_client):
+        client, _cache = app_client
+        # Aucun frame réel n'existe -> on attend 200 avec screens=[].
+        resp = client.post(
+            "/api/v1/lea/screen/analyze",
+            json={
+                "session_id": "non_existent_session_xyz",
+                "screenshot_indexes": [0, 1, 2],
+            },
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["session_id"] == "non_existent_session_xyz"
+        assert data["screens"] == []
+        assert data["degraded"] is True
+
+    def test_endpoint_invalid_session_id(self, app_client):
+        client, _cache = app_client
+        resp = client.post(
+            "/api/v1/lea/screen/analyze",
+            json={
+                "session_id": "../etc/passwd",
+                "screenshot_indexes": [],
+            },
+        )
+        assert resp.status_code == 400
+        assert resp.json()["detail"]["error"] == "invalid_session_id"
+
+    def test_endpoint_full_flow_with_explicit_paths(self, app_client, tmp_path):
+        client, _cache = app_client
+        # Préparer 3 screenshots et passer les chemins explicitement.
+        shots = tmp_path / "endpoint_shots"
+        _make_screenshot(shots / "10.png", seed=5)
+        _make_screenshot(shots / "11.png", seed=5)
+        _make_screenshot(shots / "12.png", seed=99)
+        paths = {
+            "10": str(shots / "10.png"),
+            "11": str(shots / "11.png"),
+            "12": str(shots / "12.png"),
+        }
+
+        # Patcher OmniParserSafeWrapper pour qu'il soit toujours indisponible
+        # (fallback OCR-seul docTR) -> on stubbe docTR aussi.
+        from core.semantic import phase25_analyzer as PA
+
+        def _stub_doctr(image, screenshot_path):
+            return [
+                {"label": "TXT", "text": "TXT", "bbox": [0, 0, 30, 10], "confidence": 0.6},
+            ]
+
+        original_doctr = PA._detect_via_doctr
+        PA._detect_via_doctr = _stub_doctr
+
+        # Force fallback : on patche le wrapper pour qu'il soit unavailable.
+        original_wrapper_init = PA._OmniParserSafeWrapper._try_import
+
+        def _no_op_import(self):
+            self._adapter = None
+            self._available = False
+            self._import_error = "stubbed_unavailable"
+
+        PA._OmniParserSafeWrapper._try_import = _no_op_import
+
+        try:
+            resp = client.post(
+                "/api/v1/lea/screen/analyze",
+                json={
+                    "session_id": "endpoint_test",
+                    "screenshot_indexes": [10, 11, 12],
+                    "screenshot_paths": paths,
+                    "window_titles": {"10": "Win A", "12": "Win B"},
+                    "write_yaml": False,
+                },
+            )
+        finally:
+            PA._detect_via_doctr = original_doctr
+            PA._OmniParserSafeWrapper._try_import = original_wrapper_init
+
+        assert resp.status_code == 200, resp.text
+        data = resp.json()
+        assert data["session_id"] == "endpoint_test"
+        # 2 écrans distincts attendus (idx 10/11 groupés + idx 12).
+        assert 1 <= len(data["screens"]) <= 3
+        # Mode dégradé -> text_blocks docTR.
+        assert data["degraded"] is True
+        # Contrat snapshot : présence de "elements" aplatis.
+        for sc in data["screens"]:
+            assert "elements" in sc
+            assert "structure" in sc
+            assert "hash" in sc
+
+
+# ---------------------------------------------------------------------------
+# CORRECTIFS P1-SEMANTIQUE GO conditionnel Qwen :
+# 1. Non-blocage event loop pendant analyze_frames (run_in_executor).
+# 2. Timeout effectif autour de chaque appel OmniParser + log dédié.
+# ---------------------------------------------------------------------------
+
+
+class TestEventLoopNotBlocked:
+    """L'endpoint POST /api/v1/lea/screen/analyze ne doit pas bloquer
+    l'event loop pendant l'analyse synchrone (analyze_frames).
+
+    Méthode : monkeypatch ``analyze_frames`` pour qu'il dorme 2s, lancer
+    2 appels en parallèle via httpx.AsyncClient + ASGITransport, vérifier
+    que la durée totale est sensiblement < 4s (donc < ~3s) car les deux
+    analyses doivent progresser en concurrent dans le ThreadPoolExecutor.
+    """
+
+    @pytest.mark.asyncio
+    async def test_endpoint_does_not_block_event_loop(self, tmp_path, monkeypatch):
+        import httpx
+        from httpx import ASGITransport
+
+        os.environ["RPA_AUTH_DISABLED"] = "true"
+        os.environ.setdefault("RPA_API_TOKEN", "test-token-phase25-loop")
+
+        from agent_v0.server_v1 import api_stream as api
+        from core.semantic import phase25_analyzer as PA
+
+        # Préparer 1 screenshot pour passer la validation paths.
+        shots = tmp_path / "shots_loop"
+        _make_screenshot(shots / "0.png", seed=7)
+
+        # Monkeypatch analyze_frames pour dormir 2s (simule analyse longue
+        # CPU-bound côté analyzer). Si l'endpoint bloque l'event loop,
+        # 2 appels concurrents prendront ~4s ; sinon ~2s (executor en //).
+        sleep_sec = 2.0
+
+        class _DummyResult:
+            def __init__(self, sid):
+                self.session_id = sid
+                self.generated_at = "2026-06-01T00:00:00Z"
+                self.omniparser_available = False
+                self.degraded = False
+                self.too_complex = False
+                self.screens = []
+
+            def to_dict(self):
+                return {
+                    "session_id": self.session_id,
+                    "generated_at": self.generated_at,
+                    "omniparser_available": self.omniparser_available,
+                    "degraded": self.degraded,
+                    "too_complex": self.too_complex,
+                    "healthcheck_passed": True,
+                    "healthcheck_reason": None,
+                    "screens": [],
+                }
+
+        def _slow_analyze(self, *args, **kwargs):
+            time.sleep(sleep_sec)
+            return _DummyResult(self.session_id)
+
+        monkeypatch.setattr(PA.Phase25Analyzer, "analyze_frames", _slow_analyze)
+
+        transport = ASGITransport(app=api.app)
+        async with httpx.AsyncClient(
+            transport=transport, base_url="http://test"
+        ) as client:
+            paths = {"0": str(shots / "0.png")}
+            payload = {
+                "session_id": "loop_sess_a",
+                "screenshot_indexes": [0],
+                "screenshot_paths": paths,
+            }
+            payload2 = {**payload, "session_id": "loop_sess_b"}
+
+            t0 = time.monotonic()
+            r1, r2 = await asyncio.gather(
+                client.post("/api/v1/lea/screen/analyze", json=payload),
+                client.post("/api/v1/lea/screen/analyze", json=payload2),
+            )
+            elapsed = time.monotonic() - t0
+
+        assert r1.status_code == 200, r1.text
+        assert r2.status_code == 200, r2.text
+        # Marge confortable : si event loop bloqué -> ~4s, sinon ~2s.
+        # On accepte jusqu'à 3.5s pour absorber overhead CI / GC.
+        assert elapsed < 3.5, (
+            f"event loop semble bloque : 2 appels paralleles ont pris {elapsed:.2f}s "
+            f"(attendu < 3.5s avec sleep={sleep_sec}s)"
+        )
+
+
+class TestOmniParserTimeout:
+    """Le ``OMNIPARSER_TIMEOUT_SEC`` doit être appliqué comme timeout dur
+    autour de chaque appel ``_adapter.detect`` via ThreadPoolExecutor +
+    ``future.result(timeout=...)``.
+
+    Si OmniParser hang -> ``concurrent.futures.TimeoutError`` -> log dédié
+    + ``degraded=True`` + fallback docTR. Pas de 500 vers le caller.
+    """
+
+    def test_omniparser_timeout_triggers_fallback(self, tmp_path, monkeypatch):
+        # Forcer un timeout court (0.5s) pour ne pas bloquer la suite de tests.
+        monkeypatch.setattr(P, "OMNIPARSER_TIMEOUT_SEC", 0.5)
+        monkeypatch.setattr(P, "OMNIPARSER_CACHE_ROOT", tmp_path / "cache")
+        monkeypatch.setattr(P, "LOGS_DIR", tmp_path / "logs")
+        monkeypatch.setattr(
+            P, "OMNIPARSER_ERROR_LOG", tmp_path / "logs" / "omniparser_errors.log"
+        )
+
+        # Stub docTR : retourne 1 text_block pour confirmer le fallback.
+        monkeypatch.setattr(
+            P, "_detect_via_doctr",
+            lambda image, screenshot_path: [
+                {"label": "FALLBACK_OCR", "text": "FALLBACK_OCR",
+                 "bbox": [0, 0, 30, 10], "confidence": 0.6},
+            ],
+        )
+
+        # OmniParser disponible mais qui hang (dort > timeout).
+        op = P._OmniParserSafeWrapper.__new__(P._OmniParserSafeWrapper)
+        op._adapter = MagicMock()
+        op._available = True
+        op._import_error = None
+
+        def _hang(image):
+            time.sleep(5.0)  # >> 0.5s timeout
+            return []
+
+        op._adapter.detect.side_effect = _hang
+
+        analyzer = P.Phase25Analyzer(
+            session_id="timeout_sess",
+            omniparser=op,
+            timeout_sec=0.5,
+        )
+        img = Image.new("RGB", (64, 64), color=(255, 255, 255))
+
+        t0 = time.monotonic()
+        # On contourne le healthcheck (qui consommerait aussi un timeout).
+        result = analyzer.analyze_screen(
+            frame_index=0, image=img, phash="ab", force_fallback=False,
+        )
+        elapsed = time.monotonic() - t0
+
+        # Timeout effectif : doit terminer en ~0.5s (large marge à 3s
+        # pour CPU lent).
+        assert elapsed < 3.0, (
+            f"timeout pas applique : analyze_screen a pris {elapsed:.2f}s "
+            f"(attendu ~0.5s, hang stub = 5s)"
+        )
+        # Fallback déclenché.
+        assert result.degraded is True
+        assert result.degraded_reason and "omniparser_exception" in result.degraded_reason
+        assert "TimeoutError" in result.degraded_reason
+        # docTR a pris la main.
+        assert len(result.structure.text_blocks) == 1
+        assert result.structure.text_blocks[0]["label"] == "FALLBACK_OCR"
+
+    def test_omniparser_timeout_logged(self, tmp_path, monkeypatch):
+        """Vérifie qu'une ligne JSON est ajoutée dans
+        ``logs/omniparser_errors.log`` avec motif timeout.
+        """
+        monkeypatch.setattr(P, "OMNIPARSER_TIMEOUT_SEC", 0.3)
+        monkeypatch.setattr(P, "OMNIPARSER_CACHE_ROOT", tmp_path / "cache")
+        monkeypatch.setattr(P, "LOGS_DIR", tmp_path / "logs")
+        log_path = tmp_path / "logs" / "omniparser_errors.log"
+        monkeypatch.setattr(P, "OMNIPARSER_ERROR_LOG", log_path)
+        monkeypatch.setattr(
+            P, "_detect_via_doctr",
+            lambda image, screenshot_path: [],
+        )
+
+        op = P._OmniParserSafeWrapper.__new__(P._OmniParserSafeWrapper)
+        op._adapter = MagicMock()
+        op._available = True
+        op._import_error = None
+
+        def _hang(image):
+            time.sleep(3.0)
+            return []
+
+        op._adapter.detect.side_effect = _hang
+
+        analyzer = P.Phase25Analyzer(
+            session_id="timeout_log_sess",
+            omniparser=op,
+            timeout_sec=0.3,
+        )
+        img = Image.new("RGB", (64, 64), color=(255, 255, 255))
+        result = analyzer.analyze_screen(
+            frame_index=42, image=img, phash="cd", force_fallback=False,
+        )
+
+        assert result.degraded is True
+        assert log_path.exists(), "le log d'erreur omniparser doit etre cree"
+        log_content = log_path.read_text(encoding="utf-8")
+        # Le log est append-only JSON-lines. On vérifie qu'au moins une
+        # ligne contient TimeoutError + session_id.
+        assert "TimeoutError" in log_content
+        assert "timeout_log_sess" in log_content
+        assert '"frame_index": 42' in log_content
--- a/tests/integration/test_shadow_full_cycle.py
+++ b/tests/integration/test_shadow_full_cycle.py
@@ -0,0 +1,245 @@
+"""Tests integration HTTP — cycle complet Lea-first jusqu'au /persist.
+
+Specs : docs/POC/SPECS_ENDPOINT_PERSIST_2026-06-01.md
+
+Le focus est sur l'endpoint /persist : on mocke les phases amont (Shadow
+build) en construisant directement le workflow_ir. Pas de lancement reel
+de session Shadow ici.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+_ROOT = str(Path(__file__).resolve().parents[2])
+if _ROOT not in sys.path:
+    sys.path.insert(0, _ROOT)
+
+
+pytestmark = pytest.mark.integration
+
+
+_TEST_API_TOKEN = "test_persist_endpoint_token_xyz"
+
+
+@pytest.fixture
+def persist_client(monkeypatch, tmp_path):
+    """TestClient FastAPI + redirection des chemins persist vers tmp_path."""
+    monkeypatch.setenv("RPA_API_TOKEN", _TEST_API_TOKEN)
+    monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
+
+    # DB agents isolee pour eviter le guard fleet
+    monkeypatch.setenv("RPA_AGENTS_DB_PATH", str(tmp_path / "agents.db"))
+
+    from fastapi.testclient import TestClient
+    from agent_v0.server_v1 import api_stream
+    from agent_v0.server_v1.agent_registry import AgentRegistry
+    from core.competences import persist as P
+
+    monkeypatch.setattr(api_stream, "API_TOKEN", _TEST_API_TOKEN)
+
+    # api_stream peut deja etre importe par un autre test avant que
+    # RPA_AGENTS_DB_PATH soit pose. Isoler explicitement le registre global.
+    test_registry = AgentRegistry(db_path=str(tmp_path / "agents.db"))
+    monkeypatch.setattr(api_stream, "agent_registry", test_registry)
+
+    # Rediriger toutes les ecritures vers tmp_path
+    candidate_dir = tmp_path / "competences" / "candidate"
+    candidate_dir.mkdir(parents=True, exist_ok=True)
+    audit_path = tmp_path / "competences" / "persist_audit.jsonl"
+    incomplete_path = tmp_path / "competences" / "incomplete_learnings.jsonl"
+
+    monkeypatch.setattr(P, "COMPETENCES_ROOT", tmp_path / "competences")
+    monkeypatch.setattr(P, "CANDIDATE_DIR", candidate_dir)
+    monkeypatch.setattr(P, "AUDIT_PATH", audit_path)
+    monkeypatch.setattr(P, "INCOMPLETE_PATH", incomplete_path)
+
+    # Reset du rate limiter pour eviter les fuites entre tests
+    P.persist_rate_limiter.reset()
+
+    client = TestClient(api_stream.app, raise_server_exceptions=False)
+    return client, tmp_path
+
+
+def _auth_headers():
+    return {"Authorization": f"Bearer {_TEST_API_TOKEN}"}
+
+
+def _minimal_workflow_ir():
+    return {
+        "steps": [
+            {
+                "kind": "click",
+                "primitive_ref": "click",
+                "parameters": {"target": "Bouton OK"},
+                "description": "Clic sur Bouton OK",
+            }
+        ],
+        "preconditions": [],
+        "success_marker": {
+            "mode": "all_of",
+            "timeout_ms": 5000,
+            "markers": [],
+        },
+    }
+
+
+class TestPersistEndpointSuccess:
+    """Cas nominal : payload valide -> 201 + YAML cree + audit ecrit."""
+
+    def test_persist_returns_201_and_writes_yaml(self, persist_client):
+        client, tmp_path = persist_client
+        resp = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json={
+                "name": "Test Cycle Complet",
+                "machine_id": "machine_test_x",
+                "session_id": "sess_xyz",
+                "workflow_ir": _minimal_workflow_ir(),
+                "parameters": [],
+                "annotations_semantiques": {"intent_fr": "tester cycle"},
+                "learning_metadata": {"persist_id": "uuid-cycle-1"},
+            },
+            headers=_auth_headers(),
+        )
+        assert resp.status_code == 201, resp.text
+        body = resp.json()
+        assert body["competence_id"] == "test_cycle_complet"
+        assert body["learning_state"] == "candidate"
+        assert body["persist_id"] == "uuid-cycle-1"
+        assert body["audit_entry_id"] >= 1
+
+        # Le YAML doit etre present sur disque (chemin redirige par fixture)
+        yaml_file = tmp_path / "competences" / "candidate" / "test_cycle_complet.yaml"
+        assert yaml_file.exists()
+        # Audit enrichi
+        audit = tmp_path / "competences" / "persist_audit.jsonl"
+        assert audit.exists()
+        lines = audit.read_text(encoding="utf-8").strip().splitlines()
+        assert any(json.loads(li).get("persist_id") == "uuid-cycle-1" for li in lines)
+
+    def test_persist_idempotence_returns_previous(self, persist_client):
+        client, _ = persist_client
+        payload = {
+            "name": "Idempotent Test",
+            "machine_id": "machine_test_x",
+            "workflow_ir": _minimal_workflow_ir(),
+            "learning_metadata": {"persist_id": "uuid-idem-1"},
+        }
+        r1 = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json=payload,
+            headers=_auth_headers(),
+        )
+        assert r1.status_code == 201
+        r2 = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json=payload,
+            headers=_auth_headers(),
+        )
+        # Idempotent : 200 ou 201 avec idempotent_replay=True
+        assert r2.status_code in (200, 201)
+        body2 = r2.json()
+        assert body2.get("idempotent_replay") is True
+        assert body2["competence_id"] == r1.json()["competence_id"]
+
+
+class TestPersistEndpointEdgeCases:
+    """Cas particuliers documentes dans specs §5."""
+
+    def test_partial_true_force_incomplete_state(self, persist_client):
+        client, tmp_path = persist_client
+        resp = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json={
+                "name": "Partial Demo",
+                "machine_id": "machine_test_x",
+                "workflow_ir": {"steps": []},  # vide accepte si partial
+                "learning_metadata": {
+                    "persist_id": "uuid-partial-1",
+                    "partial": True,
+                    "dropout_reason": "utilisateur a abandonne",
+                },
+            },
+            headers=_auth_headers(),
+        )
+        assert resp.status_code == 201, resp.text
+        assert resp.json()["learning_state"] == "incomplete"
+        # Double entree : audit principal + incomplete
+        incomplete = tmp_path / "competences" / "incomplete_learnings.jsonl"
+        assert incomplete.exists()
+
+    def test_empty_workflow_without_partial_returns_400(self, persist_client):
+        client, _ = persist_client
+        resp = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json={
+                "name": "Empty Demo",
+                "machine_id": "machine_test_x",
+                "workflow_ir": {"steps": []},
+                "learning_metadata": {"persist_id": "uuid-empty-1"},
+            },
+            headers=_auth_headers(),
+        )
+        assert resp.status_code == 400
+        assert resp.json()["detail"]["error"] == "empty_workflow_ir"
+
+    def test_partial_without_dropout_reason_returns_400(self, persist_client):
+        client, _ = persist_client
+        resp = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json={
+                "name": "Partial Sans Raison",
+                "machine_id": "machine_test_x",
+                "workflow_ir": {"steps": []},
+                "learning_metadata": {
+                    "persist_id": "uuid-partial-noreason",
+                    "partial": True,
+                },
+            },
+            headers=_auth_headers(),
+        )
+        assert resp.status_code == 400
+        assert resp.json()["detail"]["error"] == "dropout_reason_required"
+
+    def test_stable_request_forced_to_candidate(self, persist_client):
+        client, _ = persist_client
+        resp = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json={
+                "name": "Force Candidate Demo",
+                "machine_id": "machine_test_x",
+                "workflow_ir": _minimal_workflow_ir(),
+                "learning_metadata": {
+                    "persist_id": "uuid-stable-attempt",
+                    "learning_state": "stable",
+                },
+            },
+            headers=_auth_headers(),
+        )
+        assert resp.status_code == 201
+        # Regle d'or HDS : jamais stable par persist direct
+        assert resp.json()["learning_state"] == "candidate"
+
+    def test_slug_collision_returns_409(self, persist_client):
+        client, tmp_path = persist_client
+        # Pre-creer un YAML candidate pour declencher la collision
+        candidate = tmp_path / "competences" / "candidate" / "collision_demo.yaml"
+        candidate.write_text("id: collision_demo\nname: x\n", encoding="utf-8")
+
+        resp = client.post(
+            "/api/v1/lea/competences/candidate/persist",
+            json={
+                "name": "Collision Demo",
+                "machine_id": "machine_test_x",
+                "workflow_ir": _minimal_workflow_ir(),
+                "learning_metadata": {"persist_id": "uuid-coll-1"},
+            },
+            headers=_auth_headers(),
+        )
+        assert resp.status_code == 409
+        assert resp.json()["detail"]["error"] == "slug_collision"