#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
GLiNER Manager — NER zero-shot pour validation croisée des entités.
-------------------------------------------------------------------
Utilise GLiNER (< 500M params, CPU) comme 3e signal NER en vote majoritaire
avec CamemBERT ONNX + EDS-Pseudo. Réduit les faux positifs : une entité
flaggée par 1 seul modèle sur 3 est supprimée.

Modèle : urchade/gliner_multi_pii-v1 (1.1 GB, ~95ms/inférence CPU)
Version compatible : gliner==0.2.18 (pas plus récent, casse optimum-onnx)
"""
from __future__ import annotations

import logging
from typing import Any, Dict, List, Optional

log = logging.getLogger(__name__)

try:
    from gliner import GLiNER
    _GLINER_AVAILABLE = True
except ImportError:
    GLiNER = None  # type: ignore
    _GLINER_AVAILABLE = False

# Labels zero-shot pour la détection PII en contexte clinique français
GLINER_PII_LABELS = [
    "person_name",
    "date_of_birth",
    "phone_number",
    "email_address",
    "social_security_number",
    "postal_address",
    "hospital",
    "city",
]

# Labels pour identifier les termes médicaux (anti-PII : si classé ici → pas un nom)
GLINER_SAFE_LABELS = [
    "medication",
    "medical_condition",
    "medical_procedure",
]

# Mapping GLiNER label → clé PLACEHOLDERS
GLINER_LABEL_MAP: Dict[str, str] = {
    "person_name": "NOM",
    "date_of_birth": "DATE_NAISSANCE",
    "phone_number": "TEL",
    "email_address": "EMAIL",
    "social_security_number": "NIR",
    "postal_address": "ADRESSE",
    "hospital": "ETAB",
    "city": "VILLE",
}

DEFAULT_MODEL = "urchade/gliner_multi_pii-v1"


class GlinerManager:
    """Gestionnaire GLiNER pour NER zero-shot. Utilisé en vote majoritaire."""

    def __init__(self):
        self._model = None
        self._loaded = False
        self.model_id: Optional[str] = None

    def is_loaded(self) -> bool:
        return self._loaded and self._model is not None

    def load(self, model_id: str = DEFAULT_MODEL) -> None:
        if not _GLINER_AVAILABLE:
            raise RuntimeError("gliner non disponible. Installez : pip install 'gliner==0.2.18'")
        self.unload()
        self.model_id = model_id
        self._model = GLiNER.from_pretrained(model_id)
        self._loaded = True
        log.info(f"GLiNER chargé: {model_id}")

    def unload(self) -> None:
        self._model = None
        self._loaded = False
        self.model_id = None

    def predict(
        self,
        text: str,
        labels: Optional[List[str]] = None,
        threshold: float = 0.5,
    ) -> List[Dict[str, Any]]:
        """Prédit les entités dans un texte.

        Returns:
            Liste de dicts avec: text, label, score, start, end
        """
        if not self.is_loaded():
            return []
        if labels is None:
            labels = GLINER_PII_LABELS + GLINER_SAFE_LABELS
        try:
            entities = self._model.predict_entities(text, labels, threshold=threshold)
            return [
                {
                    "text": e["text"],
                    "label": e["label"],
                    "score": e["score"],
                    "start": e["start"],
                    "end": e["end"],
                }
                for e in entities
            ]
        except Exception as e:
            log.warning(f"GLiNER predict error: {e}")
            return []

    def is_pii(self, text: str, entity_text: str, threshold: float = 0.5) -> Optional[str]:
        """Vérifie si un token est un PII selon GLiNER.

        Returns:
            La clé PLACEHOLDERS mappée si PII, None sinon.
        """
        if not self.is_loaded():
            return None
        entities = self.predict(text, threshold=threshold)
        for e in entities:
            if e["text"].strip().lower() == entity_text.strip().lower():
                if e["label"] in GLINER_LABEL_MAP:
                    return GLINER_LABEL_MAP[e["label"]]
                if e["label"] in GLINER_SAFE_LABELS:
                    return None  # Explicitement classé comme terme médical
        return None  # Pas trouvé → pas de vote

    def validate_entities(
        self,
        text: str,
        eds_entities: List[Dict[str, Any]],
        threshold: float = 0.4,
    ) -> List[Dict[str, Any]]:
        """Valide les entités EDS-Pseudo via GLiNER (vote croisé).

        Chaque entité EDS reçoit un champ 'gliner_confirmed': True/False/None.
        - True : GLiNER aussi détecte ce span comme PII
        - False : GLiNER classifie ce span comme terme médical (medication/condition/procedure)
        - None : GLiNER ne détecte rien (neutre)
        """
        if not self.is_loaded() or not eds_entities:
            return eds_entities

        # Prédiction GLiNER sur tout le texte
        all_labels = GLINER_PII_LABELS + GLINER_SAFE_LABELS
        gliner_preds = self.predict(text, labels=all_labels, threshold=threshold)

        # Index rapide : pour chaque position de caractère, quelles entités GLiNER couvrent
        for e in eds_entities:
            e_start = e.get("start", -1)
            e_end = e.get("end", -1)
            e_word = (e.get("word") or "").lower()

            confirmed = None  # par défaut: neutre
            for g in gliner_preds:
                g_text = g["text"].lower()
                # Match par overlap ou par texte identique
                overlap = (
                    (g["start"] <= e_start < g["end"]) or
                    (g["start"] < e_end <= g["end"]) or
                    (e_start <= g["start"] and e_end >= g["end"])
                )
                text_match = g_text == e_word or e_word in g_text or g_text in e_word

                if overlap or text_match:
                    if g["label"] in GLINER_SAFE_LABELS:
                        confirmed = False  # GLiNER dit: c'est médical, pas PII
                        break
                    elif g["label"] in GLINER_LABEL_MAP:
                        confirmed = True  # GLiNER confirme: c'est PII

            e["gliner_confirmed"] = confirmed

        return eds_entities