v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
399
core/execution/screen_signature.py
Normal file
399
core/execution/screen_signature.py
Normal file
@@ -0,0 +1,399 @@
|
||||
"""
|
||||
Screen Signature - Génération de signatures d'écran pour apprentissage persistant
|
||||
|
||||
Fiche #18 - Utilitaire pour générer des signatures stables d'écrans
|
||||
permettant de reconnaître des layouts similaires entre sessions.
|
||||
|
||||
Auteur: Dom, Alice Kiro - 22 décembre 2025
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from typing import List, Optional, Dict, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LayoutElement:
|
||||
"""Élément simplifié pour signature de layout"""
|
||||
role: str
|
||||
bbox: tuple # (x, y, w, h)
|
||||
area: float
|
||||
text_length: int = 0
|
||||
|
||||
|
||||
def screen_signature(
|
||||
screen_state,
|
||||
ui_elements: List,
|
||||
mode: str = "layout"
|
||||
) -> str:
|
||||
"""
|
||||
Générer une signature stable d'un écran.
|
||||
|
||||
Modes disponibles:
|
||||
- "layout": Basé sur la disposition des éléments UI (positions relatives)
|
||||
- "content": Basé sur le contenu textuel et les rôles
|
||||
- "hybrid": Combinaison layout + content
|
||||
|
||||
Args:
|
||||
screen_state: ScreenState actuel
|
||||
ui_elements: Liste des éléments UI détectés
|
||||
mode: Mode de signature ("layout", "content", "hybrid")
|
||||
|
||||
Returns:
|
||||
Signature hexadécimale (MD5)
|
||||
"""
|
||||
if mode == "layout":
|
||||
return _layout_signature(screen_state, ui_elements)
|
||||
elif mode == "content":
|
||||
return _content_signature(screen_state, ui_elements)
|
||||
elif mode == "hybrid":
|
||||
layout_sig = _layout_signature(screen_state, ui_elements)
|
||||
content_sig = _content_signature(screen_state, ui_elements)
|
||||
combined = f"{layout_sig}|{content_sig}"
|
||||
return hashlib.md5(combined.encode('utf-8')).hexdigest()
|
||||
else:
|
||||
raise ValueError(f"Unknown signature mode: {mode}")
|
||||
|
||||
|
||||
def _layout_signature(screen_state, ui_elements: List) -> str:
|
||||
"""
|
||||
Signature basée sur la disposition des éléments.
|
||||
|
||||
Utilise:
|
||||
- Positions relatives des éléments (normalisées)
|
||||
- Tailles relatives
|
||||
- Rôles des éléments
|
||||
- Structure hiérarchique approximative
|
||||
|
||||
Résistant aux petits changements de position mais sensible
|
||||
aux changements de layout majeurs.
|
||||
"""
|
||||
if not ui_elements:
|
||||
return hashlib.md5(b"empty_layout").hexdigest()
|
||||
|
||||
# Obtenir la résolution d'écran pour normalisation
|
||||
try:
|
||||
screen_width = screen_state.window.screen_resolution[0]
|
||||
screen_height = screen_state.window.screen_resolution[1]
|
||||
except (AttributeError, IndexError):
|
||||
screen_width, screen_height = 1920, 1080 # Fallback
|
||||
|
||||
# Convertir les éléments en format simplifié
|
||||
layout_elements = []
|
||||
|
||||
for elem in ui_elements:
|
||||
try:
|
||||
# Extraire bbox (format XYWH)
|
||||
if hasattr(elem, 'bbox'):
|
||||
bbox = elem.bbox
|
||||
if hasattr(bbox, 'to_tuple'):
|
||||
x, y, w, h = bbox.to_tuple()
|
||||
else:
|
||||
x, y, w, h = bbox
|
||||
else:
|
||||
continue # Skip si pas de bbox
|
||||
|
||||
# Normaliser les coordonnées (0-1)
|
||||
norm_x = x / screen_width
|
||||
norm_y = y / screen_height
|
||||
norm_w = w / screen_width
|
||||
norm_h = h / screen_height
|
||||
|
||||
# Calculer l'aire normalisée
|
||||
area = norm_w * norm_h
|
||||
|
||||
# Extraire le rôle
|
||||
role = getattr(elem, 'role', '') or getattr(elem, 'type', '') or 'unknown'
|
||||
|
||||
# Longueur du texte (approximative)
|
||||
label = getattr(elem, 'label', '') or ''
|
||||
text_length = len(label.strip()) if label else 0
|
||||
|
||||
layout_elements.append(LayoutElement(
|
||||
role=role.lower(),
|
||||
bbox=(norm_x, norm_y, norm_w, norm_h),
|
||||
area=area,
|
||||
text_length=text_length
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error processing element for layout signature: {e}")
|
||||
continue
|
||||
|
||||
if not layout_elements:
|
||||
return hashlib.md5(b"no_valid_elements").hexdigest()
|
||||
|
||||
# Trier par position (top-left à bottom-right) pour stabilité
|
||||
layout_elements.sort(key=lambda e: (e.bbox[1], e.bbox[0])) # y puis x
|
||||
|
||||
# Construire la signature
|
||||
signature_parts = []
|
||||
|
||||
# 1. Nombre total d'éléments par rôle
|
||||
role_counts = {}
|
||||
for elem in layout_elements:
|
||||
role_counts[elem.role] = role_counts.get(elem.role, 0) + 1
|
||||
|
||||
signature_parts.append(f"roles:{','.join(f'{r}:{c}' for r, c in sorted(role_counts.items()))}")
|
||||
|
||||
# 2. Grille approximative (diviser l'écran en 4x4)
|
||||
grid_signature = _compute_grid_signature(layout_elements)
|
||||
signature_parts.append(f"grid:{grid_signature}")
|
||||
|
||||
# 3. Éléments dominants (les plus gros)
|
||||
dominant_elements = sorted(layout_elements, key=lambda e: e.area, reverse=True)[:5]
|
||||
dominant_sig = []
|
||||
for elem in dominant_elements:
|
||||
# Position approximative (arrondie)
|
||||
x, y, w, h = elem.bbox
|
||||
grid_x = int(x * 4) # 0-3
|
||||
grid_y = int(y * 4) # 0-3
|
||||
size_class = "L" if elem.area > 0.1 else "M" if elem.area > 0.01 else "S"
|
||||
dominant_sig.append(f"{elem.role}@{grid_x},{grid_y}:{size_class}")
|
||||
|
||||
signature_parts.append(f"dominant:{','.join(dominant_sig)}")
|
||||
|
||||
# Combiner et hasher
|
||||
signature_string = "|".join(signature_parts)
|
||||
return hashlib.md5(signature_string.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def _content_signature(screen_state, ui_elements: List) -> str:
|
||||
"""
|
||||
Signature basée sur le contenu textuel et les rôles.
|
||||
|
||||
Utilise:
|
||||
- Textes détectés (normalisés)
|
||||
- Rôles des éléments
|
||||
- Titre de fenêtre
|
||||
- Mots-clés importants
|
||||
|
||||
Résistant aux changements de position mais sensible
|
||||
aux changements de contenu.
|
||||
"""
|
||||
signature_parts = []
|
||||
|
||||
# 1. Titre de fenêtre (normalisé)
|
||||
try:
|
||||
window_title = screen_state.window.window_title or ""
|
||||
# Normaliser: enlever timestamps, numéros de version, etc.
|
||||
normalized_title = _normalize_text_for_signature(window_title)
|
||||
if normalized_title:
|
||||
signature_parts.append(f"title:{normalized_title}")
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# 2. Textes des éléments UI
|
||||
ui_texts = []
|
||||
role_text_pairs = []
|
||||
|
||||
for elem in ui_elements:
|
||||
try:
|
||||
# Extraire le texte
|
||||
label = getattr(elem, 'label', '') or ''
|
||||
if label and len(label.strip()) > 0:
|
||||
normalized_text = _normalize_text_for_signature(label)
|
||||
if normalized_text:
|
||||
ui_texts.append(normalized_text)
|
||||
|
||||
# Associer avec le rôle
|
||||
role = getattr(elem, 'role', '') or 'unknown'
|
||||
role_text_pairs.append(f"{role}:{normalized_text}")
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# 3. Textes détectés par OCR
|
||||
try:
|
||||
detected_texts = screen_state.perception.detected_text or []
|
||||
for text in detected_texts:
|
||||
if isinstance(text, str) and len(text.strip()) > 2:
|
||||
normalized_text = _normalize_text_for_signature(text)
|
||||
if normalized_text:
|
||||
ui_texts.append(normalized_text)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# Construire la signature
|
||||
if ui_texts:
|
||||
# Trier pour stabilité
|
||||
ui_texts.sort()
|
||||
signature_parts.append(f"texts:{','.join(ui_texts[:10])}") # Limiter à 10
|
||||
|
||||
if role_text_pairs:
|
||||
role_text_pairs.sort()
|
||||
signature_parts.append(f"role_texts:{','.join(role_text_pairs[:8])}") # Limiter à 8
|
||||
|
||||
# 4. Mots-clés importants (boutons, liens, etc.)
|
||||
keywords = _extract_keywords(ui_elements)
|
||||
if keywords:
|
||||
signature_parts.append(f"keywords:{','.join(sorted(keywords))}")
|
||||
|
||||
if not signature_parts:
|
||||
return hashlib.md5(b"no_content").hexdigest()
|
||||
|
||||
# Combiner et hasher
|
||||
signature_string = "|".join(signature_parts)
|
||||
return hashlib.md5(signature_string.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def _compute_grid_signature(layout_elements: List[LayoutElement]) -> str:
|
||||
"""
|
||||
Calculer une signature de grille 4x4.
|
||||
|
||||
Divise l'écran en 16 cellules et compte les éléments par cellule.
|
||||
"""
|
||||
grid = [[0 for _ in range(4)] for _ in range(4)]
|
||||
|
||||
for elem in layout_elements:
|
||||
x, y, w, h = elem.bbox
|
||||
|
||||
# Centre de l'élément
|
||||
center_x = x + w / 2
|
||||
center_y = y + h / 2
|
||||
|
||||
# Cellule de grille
|
||||
grid_x = min(3, int(center_x * 4))
|
||||
grid_y = min(3, int(center_y * 4))
|
||||
|
||||
grid[grid_y][grid_x] += 1
|
||||
|
||||
# Convertir en string compacte
|
||||
grid_str = ""
|
||||
for row in grid:
|
||||
for count in row:
|
||||
grid_str += str(min(9, count)) # Limiter à 9
|
||||
|
||||
return grid_str
|
||||
|
||||
|
||||
def _normalize_text_for_signature(text: str) -> str:
|
||||
"""
|
||||
Normaliser un texte pour signature stable.
|
||||
|
||||
Enlève:
|
||||
- Timestamps
|
||||
- Numéros de version
|
||||
- Espaces multiples
|
||||
- Caractères spéciaux
|
||||
- Casse
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
import re
|
||||
|
||||
# Convertir en minuscules
|
||||
text = text.lower().strip()
|
||||
|
||||
# Enlever timestamps communs
|
||||
text = re.sub(r'\d{1,2}:\d{2}(:\d{2})?', '', text) # HH:MM ou HH:MM:SS
|
||||
text = re.sub(r'\d{1,2}/\d{1,2}/\d{2,4}', '', text) # Dates
|
||||
text = re.sub(r'\d{4}-\d{2}-\d{2}', '', text) # Dates ISO
|
||||
|
||||
# Enlever numéros de version
|
||||
text = re.sub(r'v?\d+\.\d+(\.\d+)?', '', text)
|
||||
|
||||
# Enlever numéros génériques
|
||||
text = re.sub(r'\b\d+\b', '', text)
|
||||
|
||||
# Normaliser espaces
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
|
||||
# Garder seulement lettres, espaces et quelques caractères
|
||||
text = re.sub(r'[^a-z\s\-_]', '', text)
|
||||
|
||||
# Enlever mots très courts ou communs
|
||||
words = text.split()
|
||||
filtered_words = []
|
||||
|
||||
stop_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
|
||||
|
||||
for word in words:
|
||||
if len(word) >= 3 and word not in stop_words:
|
||||
filtered_words.append(word)
|
||||
|
||||
result = ' '.join(filtered_words).strip()
|
||||
|
||||
# Limiter la longueur
|
||||
if len(result) > 50:
|
||||
result = result[:50]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _extract_keywords(ui_elements: List) -> List[str]:
|
||||
"""
|
||||
Extraire des mots-clés importants des éléments UI.
|
||||
|
||||
Se concentre sur:
|
||||
- Boutons avec texte significatif
|
||||
- Liens
|
||||
- Titres/headers
|
||||
- Labels de formulaires
|
||||
"""
|
||||
keywords = set()
|
||||
|
||||
important_roles = {'button', 'link', 'heading', 'label', 'tab', 'menuitem'}
|
||||
|
||||
for elem in ui_elements:
|
||||
try:
|
||||
role = getattr(elem, 'role', '') or ''
|
||||
label = getattr(elem, 'label', '') or ''
|
||||
|
||||
if role.lower() in important_roles and label:
|
||||
normalized = _normalize_text_for_signature(label)
|
||||
if normalized and len(normalized) >= 3:
|
||||
# Prendre le premier mot significatif
|
||||
first_word = normalized.split()[0] if normalized.split() else ""
|
||||
if len(first_word) >= 3:
|
||||
keywords.add(first_word)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return list(keywords)
|
||||
|
||||
|
||||
def compare_signatures(sig1: str, sig2: str) -> float:
|
||||
"""
|
||||
Comparer deux signatures et retourner un score de similarité.
|
||||
|
||||
Args:
|
||||
sig1: Première signature
|
||||
sig2: Deuxième signature
|
||||
|
||||
Returns:
|
||||
Score de similarité (0.0 = différent, 1.0 = identique)
|
||||
"""
|
||||
if sig1 == sig2:
|
||||
return 1.0
|
||||
|
||||
# Pour des signatures MD5, on ne peut que comparer l'égalité exacte
|
||||
# Dans une version plus avancée, on pourrait comparer les composants
|
||||
# avant le hashage pour une similarité partielle
|
||||
return 0.0
|
||||
|
||||
|
||||
def signature_stats(signatures: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculer des statistiques sur un ensemble de signatures.
|
||||
|
||||
Args:
|
||||
signatures: Liste de signatures
|
||||
|
||||
Returns:
|
||||
Dictionnaire avec statistiques
|
||||
"""
|
||||
if not signatures:
|
||||
return {"total": 0, "unique": 0, "duplicates": 0}
|
||||
|
||||
unique_signatures = set(signatures)
|
||||
|
||||
return {
|
||||
"total": len(signatures),
|
||||
"unique": len(unique_signatures),
|
||||
"duplicates": len(signatures) - len(unique_signatures),
|
||||
"uniqueness_ratio": len(unique_signatures) / len(signatures)
|
||||
}
|
||||
Reference in New Issue
Block a user