diff --git a/tests/unit/test_f5_nom_compose_orphelin.py b/tests/unit/test_f5_nom_compose_orphelin.py
new file mode 100644
index 0000000..8dc6d6b
--- /dev/null
+++ b/tests/unit/test_f5_nom_compose_orphelin.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""
+Test de non-regression pour le fix F5 (commit 299bbee).
+
+F5 : post-passe masquant la continuation orpheline d'un nom compose coupe
+par un saut de ligne dans le format Trackare en colonnes.
+
+Cas reproduit :
+    ... 07:55 NOCENT-
+    EJNAINI
+
+Le nom "NOCENT-EJNAINI" est eclate sur deux lignes. Le NER ligne par ligne
+ne peut pas les assembler. Le 1er composant (NOCENT-) est masque via un
+autre artefact de remplacement, mais le 2e (EJNAINI) reste orphelin en clair.
+
+F5 ajoute une regex post-masquage qui detecte "[NOM]-\\n<TOKEN_MAJUSCULE>"
+et masque le token orphelin. Le token doit etre directement apres le saut
+de ligne (whitespace accepte), pas apres un autre texte.
+
+Source : anonymizer_core_refactored_onnx.py, lignes ~4505-4516,
+fonction process_pdf(), bloc "3a-bis) Nettoyage post-masquage".
+"""
+from __future__ import annotations
+
+import re
+
+import pytest
+
+from anonymizer_core_refactored_onnx import PLACEHOLDERS
+
+# ---------------------------------------------------------------------------
+# F5 regex — reproduite ici pour test unitaire (identique a process_pdf)
+# ---------------------------------------------------------------------------
+_RE_NOM_ORPHAN = re.compile(
+    r"(\[NOM\]-\s*\n?\s*)([A-Z\u00C0-\u0178][A-Z\u00C0-\u0178'\-]{3,})\b"
+)
+
+
+def _apply_f5_nom_orphan(text: str) -> tuple[str, list]:
+    """Applique la post-passe F5 sur une continuation orpheline de nom compose.
+
+    Retourne le texte nettoye et la liste des tokens masques (pour audit).
+    Logique identique a celle dans process_pdf() etape 3a-bis.
+    """
+    hits = []
+
+    # Stop-words medicaux exclus du masquage (meme liste que process_pdf)
+    _MEDICAL_STOP_WORDS = {
+        "ampoule", "ampoules", "comprime", "comprimes", "gelule", "gelules",
+        "solution", "solutions", "traitement", "traitements", "injection",
+        "perfusions", "prescription", "posologie", "diagnostic", "examen",
+        "resultat", "resultats", "observation", "antibiogramme", "bacterio",
+    }
+
+    def _clean(m):
+        tok = m.group(2)
+        if tok.lower() in _MEDICAL_STOP_WORDS:
+            return m.group(0)
+        hits.append(tok)
+        return m.group(1) + PLACEHOLDERS["NOM"]
+
+    cleaned = _RE_NOM_ORPHAN.sub(_clean, text)
+    return cleaned, hits
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestF5NomComposeOrphelin:
+    """F5 - Continuation orpheline d'un nom compose coupe par saut de ligne."""
+
+    # -- Regex seule --
+
+    def test_f5_regex_matches_nom_orphan_direct_after_dash_newline(self):
+        """La regex F5 capture un token majuscule directement apres [NOM]-\\n."""
+        text = "[NOM]-\nEJNAINI"
+        match = _RE_NOM_ORPHAN.search(text)
+        assert match is not None
+        assert match.group(1) == "[NOM]-\n"
+        assert match.group(2) == "EJNAINI"
+
+    def test_f5_regex_matches_with_leading_spaces_on_next_line(self):
+        """La regex F5 tolere des espaces en debut de ligne suivante."""
+        text = "[NOM]-\n  EJNAINI"
+        match = _RE_NOM_ORPHAN.search(text)
+        assert match is not None
+        assert match.group(2) == "EJNAINI"
+
+    def test_f5_regex_matches_with_trailing_spaces_before_newline(self):
+        """La regex F5 tolere des espaces avant le saut de ligne."""
+        text = "[NOM]-  \n  EJNAINI"
+        match = _RE_NOM_ORPHAN.search(text)
+        assert match is not None
+        assert match.group(2) == "EJNAINI"
+
+    def test_f5_regex_no_match_when_intervening_text(self):
+        """La regex F5 ne matche PAS si du texte separe [NOM]-\\n du token.
+        C'est le cas quand le token n'est pas une continuation directe du nom
+        compose (ex: autre colonne du tableau Trackare)."""
+        text = "[NOM]-\nAmpoule(s) EJNAINI"
+        # "Ampoule(s)" n'est pas tout en majuscule, donc la regex ne le matche
+        # pas comme groupe 2, et EJNAINI n'est pas directement apres \n\s*
+        match = _RE_NOM_ORPHAN.search(text)
+        assert match is None, (
+            "F5 ne doit pas matcher quand du texte separe [NOM]- du token orphelin"
+        )
+
+    def test_f5_regex_rejects_lowercase_start(self):
+        """Un token commencant par une minuscule n'est pas capture."""
+        match = _RE_NOM_ORPHAN.search("[NOM]-\nejnaini")
+        assert match is None
+
+    def test_f5_regex_minimum_length_4_chars(self):
+        """Le token doit faire au moins 4 caracteres (1 + {3,})."""
+        assert _RE_NOM_ORPHAN.search("[NOM]-\nABC") is None, "3 chars = trop court"
+        assert _RE_NOM_ORPHAN.search("[NOM]-\nABCD") is not None, "4 chars = OK"
+
+    # -- Application F5 --
+
+    def test_f5_apply_masks_orphan_token(self):
+        """_apply_f5_nom_orphan remplace le token orphelin par [NOM]."""
+        text = "[NOM]-\nEJNAINI"
+        cleaned, hits = _apply_f5_nom_orphan(text)
+        assert hits == ["EJNAINI"]
+        assert "[NOM]-" in cleaned
+        assert "EJNAINI" not in cleaned
+        # Les deux parties du nom compose doivent etre masquees
+        assert cleaned.count(PLACEHOLDERS["NOM"]) == 2
+
+    def test_f5_apply_preserves_context_around_orphan(self):
+        """Le contexte autour du nom orphelin n'est pas modifie."""
+        text = "07:55 [NOM]-\nEJNAINI\nSuite du traitement"
+        cleaned, hits = _apply_f5_nom_orphan(text)
+        assert hits == ["EJNAINI"]
+        assert "07:55 " in cleaned
+        assert "Suite du traitement" in cleaned
+        assert "EJNAINI" not in cleaned
+
+    def test_f5_apply_multiple_orphans(self):
+        """F5 masque plusieurs orphelines dans le meme texte."""
+        text = "[NOM]-\nDUPONT\nAutre [NOM]-\nMARTIN"
+        cleaned, hits = _apply_f5_nom_orphan(text)
+        assert len(hits) == 2
+        assert "DUPONT" not in cleaned
+        assert "MARTIN" not in cleaned
+        assert cleaned.count(PLACEHOLDERS["NOM"]) == 4  # 2 initiaux + 2 orphelins
+
+    def test_f5_no_false_positive_on_normal_text(self):
+        """F5 ne modifie pas un texte sans pattern [NOM]-\\n<TOKEN>."""
+        text = "Patient presente le [DATE]. Traitement prescrit."
+        cleaned, hits = _apply_f5_nom_orphan(text)
+        assert hits == []
+        assert cleaned == text
+
+    # -- Cas reel Trackare --
+
+    def test_f5_full_trackare_scenario(self):
+        """Test du cas Trackare complet : nom NOCENT-EJNAINI coupe par saut
+        de ligne dans l'extraction PDF en colonnes.
+
+        Format Trackare en colonnes :
+          Colonne nom :   "07:55 NOCENT-"
+          Ligne suivante : "EJNAINI"
+
+        Apres masquage initial (pre-F5) :
+          "07:55 [NOM]-\nEJNAINI"
+
+        Apres F5 :
+          "07:55 [NOM]-\n[NOM]"
+        """
+        # Input simulant le resultat pre-F5 (NOCENT masque, EJNAINI orphelin)
+        pre_f5 = "07:55 [NOM]-\nEJNAINI"
+
+        cleaned, hits = _apply_f5_nom_orphan(pre_f5)
+
+        # Verification : les deux composantes du nom compose sont masquees
+        assert "[NOM]-" in cleaned, "Le 1er composant doit rester masque"
+        assert "EJNAINI" not in cleaned, "Le 2e composant orphelin doit etre masque par F5"
+        assert "EJNAINI" not in cleaned, "Aucune fuite du nom orphelin"
+        assert cleaned.count(PLACEHOLDERS["NOM"]) == 2, (
+            "Les deux parties du nom compose doivent etre masquees"
+        )
+        assert hits == ["EJNAINI"], "EJNAINI doit etre loggue dans l'audit"
+
+    def test_f5_trackare_with_spaces_in_column_alignment(self):
+        """Cas Trackare avec espaces d'alignement de colonne."""
+        pre_f5 = "07:55  [NOM]-  \n  EJNAINI  \nSuite"
+        cleaned, hits = _apply_f5_nom_orphan(pre_f5)
+        assert hits == ["EJNAINI"]
+        assert "EJNAINI" not in cleaned
+        assert "Suite" in cleaned
+
+    def test_f5_nom_compose_with_apostrophe_and_dash(self):
+        """Token orphelin contenant apostrophes et tirets."""
+        pre_f5 = "[NOM]-\nDUPONT-MARTIN"
+        cleaned, hits = _apply_f5_nom_orphan(pre_f5)
+        assert hits == ["DUPONT-MARTIN"]
+        assert "DUPONT-MARTIN" not in cleaned
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/unit/test_gui_batch_paths.py b/tests/unit/test_gui_batch_paths.py
new file mode 100644
index 0000000..0cda0c3
--- /dev/null
+++ b/tests/unit/test_gui_batch_paths.py
@@ -0,0 +1,70 @@
+from pathlib import Path
+
+from gui_batch_paths import (
+    build_batch_output_dir,
+    iter_pseudonymized_texts,
+    list_supported_documents,
+)
+
+
+def test_list_supported_documents_excludes_gui_output_tree(tmp_path: Path):
+    root = tmp_path / "cases"
+    source_case = root / "001_patient_header_and_birth"
+    source_case.mkdir(parents=True)
+    nested_case = root / "002_contact_bundle"
+    nested_case.mkdir(parents=True)
+    output_case = root / "anonymise" / "001_patient_header_and_birth"
+    output_case.mkdir(parents=True)
+
+    source_txt = source_case / "test.txt"
+    source_pdf = nested_case / "source.pdf"
+    output_txt = output_case / "test.pseudonymise.txt"
+
+    source_txt.write_text("source", encoding="utf-8")
+    source_pdf.write_text("pdf", encoding="utf-8")
+    output_txt.write_text("output", encoding="utf-8")
+
+    documents = list_supported_documents(root, {".txt", ".pdf"})
+
+    assert documents == [source_txt, source_pdf]
+
+
+def test_build_batch_output_dir_preserves_relative_parent(tmp_path: Path):
+    root = tmp_path / "cases"
+    output_root = root / "anonymise"
+    source = root / "010_spaced_establishment_header" / "test.txt"
+    source.parent.mkdir(parents=True)
+    source.write_text("test", encoding="utf-8")
+
+    output_dir = build_batch_output_dir(root, output_root, source)
+
+    assert output_dir == output_root / "010_spaced_establishment_header"
+
+
+def test_build_batch_output_dir_keeps_root_files_at_output_root(tmp_path: Path):
+    root = tmp_path / "cases"
+    output_root = root / "anonymise"
+    source = root / "test.txt"
+    root.mkdir(parents=True)
+    source.write_text("test", encoding="utf-8")
+
+    output_dir = build_batch_output_dir(root, output_root, source)
+
+    assert output_dir == output_root
+
+
+def test_iter_pseudonymized_texts_is_recursive(tmp_path: Path):
+    output_root = tmp_path / "anonymise"
+    nested = output_root / "001_patient_header_and_birth"
+    nested.mkdir(parents=True)
+    top_level = output_root / "summary.pseudonymise.txt"
+    nested_txt = nested / "test.pseudonymise.txt"
+    other_file = nested / "audit.jsonl"
+
+    top_level.write_text("top", encoding="utf-8")
+    nested_txt.write_text("nested", encoding="utf-8")
+    other_file.write_text("{}", encoding="utf-8")
+
+    found = sorted(iter_pseudonymized_texts(output_root))
+
+    assert found == [nested_txt, top_level]
diff --git a/tests/unit/test_manual_masking.py b/tests/unit/test_manual_masking.py
new file mode 100644
index 0000000..ee00c4d
--- /dev/null
+++ b/tests/unit/test_manual_masking.py
@@ -0,0 +1,60 @@
+from pathlib import Path
+
+from manual_masking import (
+    DEFAULT_MASK_OUTPUT_DIRNAME,
+    DEFAULT_MASK_PREVIEW_DIRNAME,
+    append_jsonl_file,
+    ensure_mask_templates_dir,
+    list_mask_templates,
+    mask_templates_dir,
+    mask_template_label,
+    resolve_manual_mask_pdf,
+)
+
+
+def test_mask_templates_dir_is_under_config():
+    base = Path("/tmp/anonymisation")
+    assert mask_templates_dir(base) == base / "config" / "mask_templates"
+
+
+def test_ensure_mask_templates_dir_creates_folder(tmp_path: Path):
+    created = ensure_mask_templates_dir(tmp_path)
+    assert created == tmp_path / "config" / "mask_templates"
+    assert created.is_dir()
+
+
+def test_resolve_manual_mask_pdf_accepts_only_pdf():
+    assert resolve_manual_mask_pdf(Path("/tmp/test.pdf")) == Path("/tmp/test.pdf")
+    assert resolve_manual_mask_pdf(Path("/tmp/test.PDF")) == Path("/tmp/test.PDF")
+    assert resolve_manual_mask_pdf(Path("/tmp/test.docx")) is None
+    assert resolve_manual_mask_pdf(None) is None
+
+
+def test_manual_mask_outputs_follow_project_convention():
+    assert DEFAULT_MASK_OUTPUT_DIRNAME == "anonymise"
+    assert DEFAULT_MASK_PREVIEW_DIRNAME == "anonymise_preview"
+
+
+def test_list_mask_templates_filters_supported_extensions(tmp_path: Path):
+    templates_dir = ensure_mask_templates_dir(tmp_path)
+    kept = templates_dir / "alpha.yml"
+    other = templates_dir / "beta.txt"
+    nested = templates_dir / "nested" / "gamma.json"
+    nested.parent.mkdir(parents=True)
+    kept.write_text("x", encoding="utf-8")
+    other.write_text("x", encoding="utf-8")
+    nested.write_text("x", encoding="utf-8")
+
+    assert list_mask_templates(tmp_path) == [kept, nested]
+    assert mask_template_label(nested, tmp_path) == "nested/gamma.json"
+
+
+def test_append_jsonl_file_appends_non_empty_content(tmp_path: Path):
+    target = tmp_path / "target.jsonl"
+    extra = tmp_path / "extra.jsonl"
+    target.write_text('{"kind":"A"}\n', encoding="utf-8")
+    extra.write_text('{"kind":"B"}\n', encoding="utf-8")
+
+    append_jsonl_file(target, extra)
+
+    assert target.read_text(encoding="utf-8") == '{"kind":"A"}\n{"kind":"B"}\n'
diff --git a/tests/unit/test_real_world_identifier_layouts.py b/tests/unit/test_real_world_identifier_layouts.py
new file mode 100644
index 0000000..1c27043
--- /dev/null
+++ b/tests/unit/test_real_world_identifier_layouts.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+"""
+Tests de non-régression sur des layouts d'identifiants vus en documents réels.
+"""
+from anonymizer_core_refactored_onnx import (
+    RE_SCAN_FILENAME_ARTIFACT,
+    anonymise_document_regex,
+    load_dictionaries,
+)
+
+
+def test_bacterio_multiline_venue_number_before_ipp_is_masked():
+    cfg = load_dictionaries(None)
+    text = (
+        "Diffusé le :\n"
+        "à\n"
+        "N° venue :\n"
+        "31/07/1973\n"
+        "VAN DE GRAAF\n"
+        "23176885\n"
+        "IPP :\n"
+        "2300201230\n"
+    )
+
+    anon = anonymise_document_regex([text], [[]], cfg)
+
+    assert "23176885" not in anon.text_out
+    assert "[NDA]" in anon.text_out
+    assert any(h.kind == "NDA" and h.original == "23176885" for h in anon.audit)
+
+
+def test_scan_filename_artifact_suffix_is_masked():
+    cfg = load_dictionaries(None)
+    text = (
+        "IPP:\n"
+        "16014215\n"
+        "Document scanné non\n"
+        "éditable pour patient (dont\ngénétique)\n"
+        "EXT2-16014215-2300249096.TIF\n"
+    )
+
+    anon = anonymise_document_regex([text], [[]], cfg)
+
+    assert RE_SCAN_FILENAME_ARTIFACT.search("EXT2-[IPP]-2300249096.TIF") is not None
+    assert "2300249096" not in anon.text_out
+    assert "EXT2-[IPP]-[DOSSIER].TIF" in anon.text_out