Files
rpa_vision_v3/core/detection/spatial_analyzer.py
Dom a27b74cf22 v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00

596 lines
19 KiB
Python

"""
SpatialAnalyzer - Analyse des relations spatiales entre éléments UI
Ce module analyse:
- Relations spatiales (above, below, left_of, right_of, inside)
- Conteneurs sémantiques (forms, menus, toolbars, dialogs)
- Groupement d'éléments liés
"""
import logging
from typing import List, Dict, Optional, Any, Tuple, Set
from dataclasses import dataclass, field
from enum import Enum
import numpy as np
logger = logging.getLogger(__name__)
# =============================================================================
# Enums et Dataclasses
# =============================================================================
class RelationType(Enum):
"""Types de relations spatiales"""
ABOVE = "above"
BELOW = "below"
LEFT_OF = "left_of"
RIGHT_OF = "right_of"
INSIDE = "inside"
CONTAINS = "contains"
OVERLAPS = "overlaps"
ADJACENT = "adjacent"
class ContainerType(Enum):
"""Types de conteneurs sémantiques"""
FORM = "form"
MENU = "menu"
TOOLBAR = "toolbar"
DIALOG = "dialog"
LIST = "list"
TABLE = "table"
PANEL = "panel"
TAB_GROUP = "tab_group"
@dataclass
class SpatialRelation:
"""Relation spatiale entre deux éléments"""
source_element_id: str
target_element_id: str
relation_type: RelationType
distance: float # Distance en pixels
confidence: float # Confiance de la relation (0-1)
def to_dict(self) -> Dict[str, Any]:
return {
"source": self.source_element_id,
"target": self.target_element_id,
"relation": self.relation_type.value,
"distance": self.distance,
"confidence": self.confidence
}
@property
def inverse(self) -> 'SpatialRelation':
"""Retourner la relation inverse"""
inverse_map = {
RelationType.ABOVE: RelationType.BELOW,
RelationType.BELOW: RelationType.ABOVE,
RelationType.LEFT_OF: RelationType.RIGHT_OF,
RelationType.RIGHT_OF: RelationType.LEFT_OF,
RelationType.INSIDE: RelationType.CONTAINS,
RelationType.CONTAINS: RelationType.INSIDE,
RelationType.OVERLAPS: RelationType.OVERLAPS,
RelationType.ADJACENT: RelationType.ADJACENT,
}
return SpatialRelation(
source_element_id=self.target_element_id,
target_element_id=self.source_element_id,
relation_type=inverse_map[self.relation_type],
distance=self.distance,
confidence=self.confidence
)
@dataclass
class SemanticContainer:
"""Conteneur sémantique groupant des éléments"""
container_id: str
container_type: ContainerType
element_ids: List[str]
bounds: Tuple[int, int, int, int] # (x, y, width, height)
confidence: float
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"container_id": self.container_id,
"container_type": self.container_type.value,
"element_ids": self.element_ids,
"bounds": self.bounds,
"confidence": self.confidence,
"metadata": self.metadata
}
@dataclass
class SpatialAnalyzerConfig:
"""Configuration de l'analyseur spatial"""
# Seuils de distance
adjacent_threshold: float = 20.0 # Distance max pour "adjacent"
inside_margin: float = 5.0 # Marge pour "inside"
# Seuils de confiance
min_relation_confidence: float = 0.5
min_container_confidence: float = 0.6
# Groupement
max_group_distance: float = 50.0 # Distance max pour grouper
min_group_size: int = 2 # Taille min d'un groupe
# =============================================================================
# Analyseur Spatial
# =============================================================================
class SpatialAnalyzer:
"""
Analyseur de relations spatiales entre éléments UI.
Fonctionnalités:
- Calcul des relations spatiales (above, below, etc.)
- Détection de conteneurs sémantiques
- Groupement d'éléments liés
Example:
>>> analyzer = SpatialAnalyzer()
>>> relations = analyzer.compute_relations(elements)
>>> containers = analyzer.detect_containers(elements)
"""
def __init__(self, config: Optional[SpatialAnalyzerConfig] = None):
"""
Initialiser l'analyseur.
Args:
config: Configuration (utilise défaut si None)
"""
self.config = config or SpatialAnalyzerConfig()
logger.info("SpatialAnalyzer initialisé")
def compute_relations(
self,
elements: List[Any]
) -> List[SpatialRelation]:
"""
Calculer les relations spatiales entre tous les éléments.
Args:
elements: Liste d'éléments UI avec bounds
Returns:
Liste de SpatialRelation
"""
relations = []
for i, elem_a in enumerate(elements):
for j, elem_b in enumerate(elements):
if i >= j: # Éviter doublons et auto-relations
continue
# Calculer relation
relation = self._compute_relation(elem_a, elem_b)
if relation and relation.confidence >= self.config.min_relation_confidence:
relations.append(relation)
# Ajouter relation inverse pour symétrie
relations.append(relation.inverse)
logger.debug(f"Calculé {len(relations)} relations spatiales")
return relations
def _compute_relation(
self,
elem_a: Any,
elem_b: Any
) -> Optional[SpatialRelation]:
"""Calculer la relation entre deux éléments."""
# Extraire bounds
bounds_a = self._get_bounds(elem_a)
bounds_b = self._get_bounds(elem_b)
if bounds_a is None or bounds_b is None:
return None
# Calculer centres
center_a = self._get_center(bounds_a)
center_b = self._get_center(bounds_b)
# Calculer distance
distance = np.sqrt(
(center_a[0] - center_b[0])**2 +
(center_a[1] - center_b[1])**2
)
# Déterminer type de relation
relation_type, confidence = self._determine_relation_type(
bounds_a, bounds_b, center_a, center_b
)
if relation_type is None:
return None
elem_id_a = self._get_element_id(elem_a)
elem_id_b = self._get_element_id(elem_b)
return SpatialRelation(
source_element_id=elem_id_a,
target_element_id=elem_id_b,
relation_type=relation_type,
distance=distance,
confidence=confidence
)
def _determine_relation_type(
self,
bounds_a: Tuple[int, int, int, int],
bounds_b: Tuple[int, int, int, int],
center_a: Tuple[float, float],
center_b: Tuple[float, float]
) -> Tuple[Optional[RelationType], float]:
"""Déterminer le type de relation et sa confiance."""
x_a, y_a, w_a, h_a = bounds_a
x_b, y_b, w_b, h_b = bounds_b
# Vérifier INSIDE/CONTAINS
if self._is_inside(bounds_a, bounds_b):
return RelationType.INSIDE, 0.9
if self._is_inside(bounds_b, bounds_a):
return RelationType.CONTAINS, 0.9
# Vérifier OVERLAPS
if self._overlaps(bounds_a, bounds_b):
return RelationType.OVERLAPS, 0.7
# Calculer différences de position
dx = center_b[0] - center_a[0]
dy = center_b[1] - center_a[1]
# Déterminer direction principale
if abs(dx) > abs(dy):
# Relation horizontale
if dx > 0:
relation = RelationType.LEFT_OF # A est à gauche de B
else:
relation = RelationType.RIGHT_OF # A est à droite de B
confidence = min(1.0, abs(dx) / (abs(dy) + 1))
else:
# Relation verticale
if dy > 0:
relation = RelationType.ABOVE # A est au-dessus de B
else:
relation = RelationType.BELOW # A est en-dessous de B
confidence = min(1.0, abs(dy) / (abs(dx) + 1))
# Vérifier adjacence
gap = self._compute_gap(bounds_a, bounds_b)
if gap <= self.config.adjacent_threshold:
confidence = min(confidence + 0.2, 1.0)
return relation, confidence
def _is_inside(
self,
inner: Tuple[int, int, int, int],
outer: Tuple[int, int, int, int]
) -> bool:
"""Vérifier si inner est à l'intérieur de outer."""
x_i, y_i, w_i, h_i = inner
x_o, y_o, w_o, h_o = outer
margin = self.config.inside_margin
return (
x_i >= x_o - margin and
y_i >= y_o - margin and
x_i + w_i <= x_o + w_o + margin and
y_i + h_i <= y_o + h_o + margin
)
def _overlaps(
self,
bounds_a: Tuple[int, int, int, int],
bounds_b: Tuple[int, int, int, int]
) -> bool:
"""Vérifier si deux bounds se chevauchent."""
x_a, y_a, w_a, h_a = bounds_a
x_b, y_b, w_b, h_b = bounds_b
return not (
x_a + w_a < x_b or
x_b + w_b < x_a or
y_a + h_a < y_b or
y_b + h_b < y_a
)
def _compute_gap(
self,
bounds_a: Tuple[int, int, int, int],
bounds_b: Tuple[int, int, int, int]
) -> float:
"""Calculer l'écart entre deux bounds."""
x_a, y_a, w_a, h_a = bounds_a
x_b, y_b, w_b, h_b = bounds_b
# Écart horizontal
if x_a + w_a < x_b:
gap_x = x_b - (x_a + w_a)
elif x_b + w_b < x_a:
gap_x = x_a - (x_b + w_b)
else:
gap_x = 0
# Écart vertical
if y_a + h_a < y_b:
gap_y = y_b - (y_a + h_a)
elif y_b + h_b < y_a:
gap_y = y_a - (y_b + h_b)
else:
gap_y = 0
return np.sqrt(gap_x**2 + gap_y**2)
def detect_containers(
self,
elements: List[Any]
) -> List[SemanticContainer]:
"""
Détecter les conteneurs sémantiques.
Identifie les groupes d'éléments formant:
- Formulaires (labels + inputs)
- Menus (items alignés)
- Barres d'outils (boutons alignés)
- Dialogues (titre + contenu + boutons)
Args:
elements: Liste d'éléments UI
Returns:
Liste de SemanticContainer
"""
containers = []
# Grouper éléments par proximité
groups = self._group_by_proximity(elements)
for group_id, group_elements in enumerate(groups):
if len(group_elements) < self.config.min_group_size:
continue
# Analyser le groupe pour déterminer le type
container_type, confidence = self._classify_container(group_elements)
if confidence < self.config.min_container_confidence:
continue
# Calculer bounds du conteneur
bounds = self._compute_group_bounds(group_elements)
container = SemanticContainer(
container_id=f"container_{group_id:03d}",
container_type=container_type,
element_ids=[self._get_element_id(e) for e in group_elements],
bounds=bounds,
confidence=confidence
)
containers.append(container)
logger.info(f"Détecté {len(containers)} conteneurs sémantiques")
return containers
def _group_by_proximity(
self,
elements: List[Any]
) -> List[List[Any]]:
"""Grouper les éléments par proximité spatiale."""
if not elements:
return []
# Union-Find pour groupement
n = len(elements)
parent = list(range(n))
def find(x):
if parent[x] != x:
parent[x] = find(parent[x])
return parent[x]
def union(x, y):
px, py = find(x), find(y)
if px != py:
parent[px] = py
# Grouper éléments proches
for i in range(n):
for j in range(i + 1, n):
bounds_i = self._get_bounds(elements[i])
bounds_j = self._get_bounds(elements[j])
if bounds_i and bounds_j:
gap = self._compute_gap(bounds_i, bounds_j)
if gap <= self.config.max_group_distance:
union(i, j)
# Construire groupes
groups_dict: Dict[int, List[Any]] = {}
for i, elem in enumerate(elements):
root = find(i)
if root not in groups_dict:
groups_dict[root] = []
groups_dict[root].append(elem)
return list(groups_dict.values())
def _classify_container(
self,
elements: List[Any]
) -> Tuple[ContainerType, float]:
"""Classifier le type de conteneur."""
# Analyser les types d'éléments
roles = [self._get_role(e) for e in elements]
# Compter types
has_input = any(r in ['textbox', 'input', 'textarea', 'combobox'] for r in roles)
has_label = any(r in ['label', 'text'] for r in roles)
has_button = any(r in ['button', 'link'] for r in roles)
has_menuitem = any(r in ['menuitem', 'option'] for r in roles)
# Analyser alignement
bounds_list = [self._get_bounds(e) for e in elements if self._get_bounds(e)]
is_vertical = self._is_vertically_aligned(bounds_list)
is_horizontal = self._is_horizontally_aligned(bounds_list)
# Classifier
if has_input and has_label:
return ContainerType.FORM, 0.8
if has_menuitem or (is_vertical and has_button):
return ContainerType.MENU, 0.7
if is_horizontal and has_button:
return ContainerType.TOOLBAR, 0.7
if has_button and len(elements) <= 5:
return ContainerType.DIALOG, 0.6
if is_vertical and len(elements) > 3:
return ContainerType.LIST, 0.6
return ContainerType.PANEL, 0.5
def _is_vertically_aligned(
self,
bounds_list: List[Tuple[int, int, int, int]]
) -> bool:
"""Vérifier si les éléments sont alignés verticalement."""
if len(bounds_list) < 2:
return False
x_centers = [(b[0] + b[2]/2) for b in bounds_list]
x_std = np.std(x_centers)
return x_std < 30 # Tolérance de 30 pixels
def _is_horizontally_aligned(
self,
bounds_list: List[Tuple[int, int, int, int]]
) -> bool:
"""Vérifier si les éléments sont alignés horizontalement."""
if len(bounds_list) < 2:
return False
y_centers = [(b[1] + b[3]/2) for b in bounds_list]
y_std = np.std(y_centers)
return y_std < 30 # Tolérance de 30 pixels
def _compute_group_bounds(
self,
elements: List[Any]
) -> Tuple[int, int, int, int]:
"""Calculer les bounds englobant un groupe."""
bounds_list = [self._get_bounds(e) for e in elements if self._get_bounds(e)]
if not bounds_list:
return (0, 0, 0, 0)
min_x = min(b[0] for b in bounds_list)
min_y = min(b[1] for b in bounds_list)
max_x = max(b[0] + b[2] for b in bounds_list)
max_y = max(b[1] + b[3] for b in bounds_list)
return (min_x, min_y, max_x - min_x, max_y - min_y)
def find_by_relation(
self,
anchor_id: str,
relation: RelationType,
relations: List[SpatialRelation]
) -> List[str]:
"""
Trouver les éléments ayant une relation spécifique avec un ancre.
Args:
anchor_id: ID de l'élément ancre
relation: Type de relation recherchée
relations: Liste des relations calculées
Returns:
Liste des IDs d'éléments correspondants
"""
results = []
for rel in relations:
if rel.source_element_id == anchor_id and rel.relation_type == relation:
results.append(rel.target_element_id)
return results
def _get_bounds(self, element: Any) -> Optional[Tuple[int, int, int, int]]:
"""Extraire les bounds d'un élément."""
if hasattr(element, 'bounds'):
return element.bounds
if hasattr(element, 'bbox'):
return element.bbox
if isinstance(element, dict):
if 'bounds' in element:
return tuple(element['bounds'])
if 'bbox' in element:
return tuple(element['bbox'])
if all(k in element for k in ['x', 'y', 'width', 'height']):
return (element['x'], element['y'], element['width'], element['height'])
return None
def _get_center(self, bounds: Tuple[int, int, int, int]) -> Tuple[float, float]:
"""Calculer le centre d'un bounds."""
x, y, w, h = bounds
return (x + w/2, y + h/2)
def _get_element_id(self, element: Any) -> str:
"""Extraire l'ID d'un élément."""
if hasattr(element, 'element_id'):
return element.element_id
if hasattr(element, 'id'):
return element.id
if isinstance(element, dict):
return element.get('id', element.get('element_id', str(id(element))))
return str(id(element))
def _get_role(self, element: Any) -> str:
"""Extraire le rôle d'un élément."""
if hasattr(element, 'role'):
return element.role.lower()
if isinstance(element, dict):
return element.get('role', 'unknown').lower()
return 'unknown'
def get_config(self) -> SpatialAnalyzerConfig:
"""Récupérer la configuration."""
return self.config
# =============================================================================
# Fonctions utilitaires
# =============================================================================
def create_spatial_analyzer(
adjacent_threshold: float = 20.0,
max_group_distance: float = 50.0
) -> SpatialAnalyzer:
"""
Créer un analyseur avec configuration personnalisée.
Args:
adjacent_threshold: Distance max pour "adjacent"
max_group_distance: Distance max pour grouper
Returns:
SpatialAnalyzer configuré
"""
config = SpatialAnalyzerConfig(
adjacent_threshold=adjacent_threshold,
max_group_distance=max_group_distance
)
return SpatialAnalyzer(config)