#!/usr/bin/env python3 """Fusionne les fichiers de paramètres envoyés par les établissements. Usage : python scripts/merge_params.py fichier1.json [fichier2.json ...] python scripts/merge_params.py --dir /chemin/vers/exports/ Fusionne les whitelist_phrases et blacklist_force_mask_terms de chaque fichier JSON exporté par la GUI dans la config maîtresse (dictionnaires.yml). """ import argparse import json import sys from pathlib import Path try: import yaml except ImportError: print("ERREUR : pyyaml requis (pip install pyyaml)") sys.exit(1) CONFIG = Path(__file__).parent.parent / "config" / "dictionnaires.yml" def merge_params(json_files: list, config_path: Path = CONFIG, dry_run: bool = False): """Fusionne les paramètres des fichiers JSON dans la config YAML.""" if not config_path.exists(): print(f"ERREUR : config introuvable : {config_path}") return cfg = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} # Charger les listes existantes existing_wl = set(cfg.get("whitelist_phrases", [])) existing_bl = set(cfg.get("blacklist", {}).get("force_mask_terms", [])) added_wl = set() added_bl = set() sources = [] for jf in json_files: try: data = json.loads(Path(jf).read_text(encoding="utf-8")) src = f"{Path(jf).name} (v{data.get('version', '?')}, {data.get('date_export', '?')[:10]})" sources.append(src) for phrase in data.get("whitelist_phrases", []): if phrase and phrase.strip() and phrase.strip() not in existing_wl: added_wl.add(phrase.strip()) for term in data.get("blacklist_force_mask_terms", []): if term and str(term).strip() and str(term).strip() not in existing_bl: added_bl.add(str(term).strip()) except Exception as e: print(f" ERREUR lecture {jf}: {e}") print(f"\nSources traitées : {len(sources)}") for s in sources: print(f" - {s}") print(f"\nNouvelles phrases whitelist : {len(added_wl)}") for p in sorted(added_wl): print(f" + {p}") print(f"\nNouveaux termes blacklist : {len(added_bl)}") for t in sorted(added_bl): print(f" + {t}") if not added_wl and not added_bl: print("\nRien de nouveau à fusionner.") return if dry_run: print("\n(dry-run — aucune modification)") return # Appliquer cfg["whitelist_phrases"] = sorted(existing_wl | added_wl) if "blacklist" not in cfg: cfg["blacklist"] = {} cfg["blacklist"]["force_mask_terms"] = sorted(existing_bl | added_bl) config_path.write_text( yaml.dump(cfg, allow_unicode=True, default_flow_style=False, sort_keys=False), encoding="utf-8", ) print(f"\nConfig mise à jour : {config_path}") print(f" Whitelist : {len(cfg['whitelist_phrases'])} phrases") print(f" Blacklist : {len(cfg['blacklist']['force_mask_terms'])} termes") def main(): parser = argparse.ArgumentParser(description="Fusionner les paramètres d'anonymisation") parser.add_argument("files", nargs="*", help="Fichiers JSON à fusionner") parser.add_argument("--dir", type=Path, help="Dossier contenant les fichiers JSON") parser.add_argument("--config", type=Path, default=CONFIG, help="Config YAML cible") parser.add_argument("--dry-run", action="store_true", help="Afficher sans modifier") args = parser.parse_args() json_files = list(args.files) if args.dir and args.dir.is_dir(): json_files.extend(str(f) for f in args.dir.glob("*.json")) if not json_files: print("Aucun fichier JSON spécifié. Usage :") print(" python scripts/merge_params.py export1.json export2.json") print(" python scripts/merge_params.py --dir /chemin/exports/") return merge_params(json_files, config_path=args.config, dry_run=args.dry_run) if __name__ == "__main__": main()