from pathlib import Path from gui_batch_paths import ( build_batch_output_dir, iter_pseudonymized_texts, list_supported_documents, ) def test_list_supported_documents_excludes_gui_output_tree(tmp_path: Path): root = tmp_path / "cases" source_case = root / "001_patient_header_and_birth" source_case.mkdir(parents=True) nested_case = root / "002_contact_bundle" nested_case.mkdir(parents=True) output_case = root / "anonymise" / "001_patient_header_and_birth" output_case.mkdir(parents=True) source_txt = source_case / "test.txt" source_pdf = nested_case / "source.pdf" output_txt = output_case / "test.pseudonymise.txt" source_txt.write_text("source", encoding="utf-8") source_pdf.write_text("pdf", encoding="utf-8") output_txt.write_text("output", encoding="utf-8") documents = list_supported_documents(root, {".txt", ".pdf"}) assert documents == [source_txt, source_pdf] def test_build_batch_output_dir_preserves_relative_parent(tmp_path: Path): root = tmp_path / "cases" output_root = root / "anonymise" source = root / "010_spaced_establishment_header" / "test.txt" source.parent.mkdir(parents=True) source.write_text("test", encoding="utf-8") output_dir = build_batch_output_dir(root, output_root, source) assert output_dir == output_root / "010_spaced_establishment_header" def test_build_batch_output_dir_keeps_root_files_at_output_root(tmp_path: Path): root = tmp_path / "cases" output_root = root / "anonymise" source = root / "test.txt" root.mkdir(parents=True) source.write_text("test", encoding="utf-8") output_dir = build_batch_output_dir(root, output_root, source) assert output_dir == output_root def test_iter_pseudonymized_texts_is_recursive(tmp_path: Path): output_root = tmp_path / "anonymise" nested = output_root / "001_patient_header_and_birth" nested.mkdir(parents=True) top_level = output_root / "summary.pseudonymise.txt" nested_txt = nested / "test.pseudonymise.txt" other_file = nested / "audit.jsonl" top_level.write_text("top", encoding="utf-8") nested_txt.write_text("nested", encoding="utf-8") other_file.write_text("{}", encoding="utf-8") found = sorted(iter_pseudonymized_texts(output_root)) assert found == [nested_txt, top_level]