#!/usr/bin/env python3 """ Script pour copier les documents sélectionnés dans tests/ground_truth/ """ import json import shutil from pathlib import Path def copy_selected_documents(): """Copie les documents sélectionnés dans le répertoire de test.""" # Charger la liste des documents sélectionnés selected_file = Path("tests/ground_truth/selected_documents.json") with open(selected_file, 'r', encoding='utf-8') as f: documents = json.load(f) # Créer le répertoire de destination dest_dir = Path("tests/ground_truth/pdfs") dest_dir.mkdir(parents=True, exist_ok=True) # Copier chaque document copied = 0 errors = [] for i, doc in enumerate(documents, 1): src_path = Path(doc['path']) # Créer un nom de fichier unique et descriptif # Format: {index:03d}_{complexity}_{type}_{original_name} doc_type = doc.get('type', 'unknown') complexity = doc.get('complexity', 'unknown') original_name = doc['filename'] # Nettoyer le nom de fichier safe_name = original_name.replace(' ', '_') dest_name = f"{i:03d}_{complexity}_{doc_type}_{safe_name}" dest_path = dest_dir / dest_name try: if src_path.exists(): shutil.copy2(src_path, dest_path) print(f"✓ Copié: {dest_name}") copied += 1 else: error_msg = f"✗ Fichier introuvable: {src_path}" print(error_msg) errors.append(error_msg) except Exception as e: error_msg = f"✗ Erreur lors de la copie de {src_path}: {e}" print(error_msg) errors.append(error_msg) # Résumé print(f"\n{'='*60}") print(f"Résumé:") print(f" Documents copiés: {copied}/{len(documents)}") print(f" Erreurs: {len(errors)}") print(f" Destination: {dest_dir.absolute()}") if errors: print(f"\nErreurs rencontrées:") for error in errors: print(f" {error}") # Créer un fichier de mapping mapping = [] for i, doc in enumerate(documents, 1): doc_type = doc.get('type', 'unknown') complexity = doc.get('complexity', 'unknown') original_name = doc['filename'] safe_name = original_name.replace(' ', '_') dest_name = f"{i:03d}_{complexity}_{doc_type}_{safe_name}" mapping.append({ "id": i, "dest_filename": dest_name, "original_path": doc['path'], "folder": doc['folder'], "original_filename": doc['filename'], "type": doc_type, "complexity": complexity, "pages": doc.get('pages', 0), "size_mb": doc.get('size_mb', 0) }) mapping_file = dest_dir / "mapping.json" with open(mapping_file, 'w', encoding='utf-8') as f: json.dump(mapping, f, indent=2, ensure_ascii=False) print(f"\nFichier de mapping créé: {mapping_file}") return copied, len(errors) if __name__ == "__main__": copied, errors = copy_selected_documents() exit(0 if errors == 0 else 1)