feat(p1): persist workflows and semantic learning artifacts
This commit is contained in:
@@ -687,6 +687,7 @@ def _extract_required_apps_from_events(
|
||||
- launch_result_target: dict optionnel (vrai clic SearchHost -> app)
|
||||
"""
|
||||
app_counts: Dict[str, int] = defaultdict(int)
|
||||
app_titles: Dict[str, List[str]] = defaultdict(list)
|
||||
first_app = None
|
||||
first_window_title = None
|
||||
|
||||
@@ -702,6 +703,8 @@ def _extract_required_apps_from_events(
|
||||
title = to_info.get("title", "")
|
||||
if app_name:
|
||||
app_counts[app_name] += 1
|
||||
if title:
|
||||
app_titles[app_name].append(title)
|
||||
if first_app is None and app_name.lower() not in _SETUP_IGNORE_APPS:
|
||||
first_app = app_name
|
||||
first_window_title = title
|
||||
@@ -741,6 +744,10 @@ def _extract_required_apps_from_events(
|
||||
"primary_launch_cmd": primary_launch_cmd,
|
||||
"first_window_title": first_window_title or "",
|
||||
"apps": dict(app_counts),
|
||||
"has_neutral_window_title": any(
|
||||
_is_neutral_window_title(title)
|
||||
for title in app_titles.get(primary_app, [])
|
||||
),
|
||||
}
|
||||
if start_menu_target:
|
||||
result["start_menu_target"] = start_menu_target
|
||||
@@ -927,6 +934,9 @@ def _extract_required_apps_from_workflow(workflow) -> Dict[str, Any]:
|
||||
"primary_launch_cmd": primary_launch_cmd,
|
||||
"first_window_title": first_title,
|
||||
"apps": {},
|
||||
"has_neutral_window_title": any(
|
||||
_is_neutral_window_title(title) for title in window_titles
|
||||
),
|
||||
"source_session_id": source_session_id,
|
||||
"machine_id": machine_id,
|
||||
}
|
||||
@@ -1113,6 +1123,50 @@ def _generate_run_dialog_setup_actions(
|
||||
},
|
||||
]
|
||||
|
||||
needs_fresh_notepad_document = (
|
||||
primary_app.lower() == "notepad.exe"
|
||||
and (
|
||||
bool(app_info.get("has_neutral_window_title"))
|
||||
or _is_neutral_window_title(first_title)
|
||||
)
|
||||
)
|
||||
if needs_fresh_notepad_document:
|
||||
if title_patterns or first_title:
|
||||
actions.append({
|
||||
"action_id": f"act_{setup_id_prefix}_verify_before_fresh_document",
|
||||
"type": "verify_screen",
|
||||
"expected_node": "setup_initial_before_fresh_document",
|
||||
"timeout_ms": 5000,
|
||||
"_setup_phase": True,
|
||||
"_setup_step": "verify_app_ready_before_fresh_document",
|
||||
"_setup_strategy": "run_dialog",
|
||||
"expected_window_title_contains": title_patterns or [first_title],
|
||||
"intention": (
|
||||
"vérifier que Bloc-notes est la scène active avant "
|
||||
"d'ouvrir un document vierge"
|
||||
),
|
||||
})
|
||||
actions.extend([
|
||||
{
|
||||
"action_id": f"act_{setup_id_prefix}_ensure_fresh_document",
|
||||
"type": "key_combo",
|
||||
"keys": ["ctrl", "n"],
|
||||
"_setup_phase": True,
|
||||
"_setup_step": "ensure_fresh_document",
|
||||
"_setup_strategy": "run_dialog",
|
||||
"expected_window_before": first_title,
|
||||
"intention": "ouvrir un document Bloc-notes vierge non nommé",
|
||||
},
|
||||
{
|
||||
"action_id": f"act_{setup_id_prefix}_wait_fresh_document",
|
||||
"type": "wait",
|
||||
"duration_ms": 400,
|
||||
"_setup_phase": True,
|
||||
"_setup_step": "wait_fresh_document",
|
||||
"_setup_strategy": "run_dialog",
|
||||
},
|
||||
])
|
||||
|
||||
if title_patterns or first_title:
|
||||
actions.append({
|
||||
"action_id": f"act_{setup_id_prefix}_verify",
|
||||
@@ -1688,6 +1742,63 @@ def _is_learned_workflow(workflow) -> bool:
|
||||
return has_prototype
|
||||
|
||||
|
||||
_TARGET_SEMANTIC_KEYS = (
|
||||
"by_text",
|
||||
"by_role",
|
||||
"anchor_id",
|
||||
"target_text",
|
||||
"ocr_description",
|
||||
"description",
|
||||
"vlm_description",
|
||||
"anchor_image_base64",
|
||||
"by_text_source",
|
||||
"window_title",
|
||||
"anchor_bbox",
|
||||
"original_size",
|
||||
)
|
||||
|
||||
|
||||
def _first_non_empty_text(*values: Any) -> str:
|
||||
for value in values:
|
||||
text = str(value or "").strip()
|
||||
if text and text.casefold() not in {"none", "null"}:
|
||||
return text
|
||||
return ""
|
||||
|
||||
|
||||
def _target_attr(target: Any, key: str, default: Any = None) -> Any:
|
||||
if isinstance(target, dict):
|
||||
return target.get(key, default)
|
||||
return getattr(target, key, default)
|
||||
|
||||
|
||||
def _copy_semantic_target_fields(
|
||||
target_spec: Dict[str, Any],
|
||||
*sources: Optional[Dict[str, Any]],
|
||||
) -> None:
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
for key in _TARGET_SEMANTIC_KEYS:
|
||||
value = source.get(key)
|
||||
if value and not target_spec.get(key):
|
||||
target_spec[key] = value
|
||||
|
||||
if not target_spec.get("by_text"):
|
||||
target_text = _first_non_empty_text(target_spec.get("target_text"))
|
||||
if target_text:
|
||||
target_spec["by_text"] = target_text
|
||||
target_spec.setdefault("by_text_source", "visual_anchor")
|
||||
|
||||
if not target_spec.get("vlm_description"):
|
||||
description = _first_non_empty_text(
|
||||
target_spec.get("description"),
|
||||
target_spec.get("ocr_description"),
|
||||
)
|
||||
if description:
|
||||
target_spec["vlm_description"] = description
|
||||
|
||||
|
||||
def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Convertir un WorkflowEdge en liste d'actions normalisées pour l'Agent V1.
|
||||
@@ -1705,8 +1816,9 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
|
||||
# Extraire les coordonnées normalisées depuis TargetSpec.by_position
|
||||
x_pct = 0.0
|
||||
y_pct = 0.0
|
||||
if target and target.by_position:
|
||||
px, py = target.by_position
|
||||
by_position = _target_attr(target, "by_position")
|
||||
if target and by_position:
|
||||
px, py = by_position
|
||||
if px <= 1.0 and py <= 1.0:
|
||||
x_pct = px
|
||||
y_pct = py
|
||||
@@ -1769,10 +1881,15 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
|
||||
elif action_type == "extract_table":
|
||||
normalized["type"] = "extract_table"
|
||||
normalized["parameters"] = {
|
||||
"output_var": action_params.get("output_var", "table_rows"),
|
||||
"output_var": (
|
||||
action_params.get("variable_name")
|
||||
or action_params.get("output_var")
|
||||
or "table_rows"
|
||||
),
|
||||
"pattern": action_params.get("pattern"),
|
||||
"limit": action_params.get("limit"),
|
||||
"region": action_params.get("region"),
|
||||
"engine": action_params.get("engine", "easyocr"),
|
||||
}
|
||||
return [normalized]
|
||||
|
||||
@@ -1833,14 +1950,33 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
|
||||
|
||||
# Ajouter le target_spec complet pour la résolution visuelle
|
||||
target_spec = {}
|
||||
if target and target.by_role:
|
||||
target_spec["by_role"] = target.by_role
|
||||
normalized["target_role"] = target.by_role # Compat debug
|
||||
if target and target.by_text:
|
||||
target_spec["by_text"] = target.by_text
|
||||
normalized["target_text"] = target.by_text # Compat debug
|
||||
if target and hasattr(target, 'context_hints') and target.context_hints:
|
||||
target_spec["context_hints"] = target.context_hints
|
||||
by_role = _target_attr(target, "by_role", "")
|
||||
by_text = _target_attr(target, "by_text", "")
|
||||
context_hints = _target_attr(target, "context_hints", {}) or {}
|
||||
if target and by_role:
|
||||
target_spec["by_role"] = by_role
|
||||
normalized["target_role"] = by_role # Compat debug
|
||||
if target and by_text:
|
||||
target_spec["by_text"] = by_text
|
||||
normalized["target_text"] = by_text # Compat debug
|
||||
if target and context_hints:
|
||||
target_spec["context_hints"] = context_hints
|
||||
_copy_semantic_target_fields(
|
||||
target_spec,
|
||||
action_params,
|
||||
action_params.get("target_spec") if isinstance(action_params, dict) else None,
|
||||
context_hints,
|
||||
)
|
||||
semantic_label = _first_non_empty_text(
|
||||
target_spec.get("by_text"),
|
||||
target_spec.get("target_text"),
|
||||
target_spec.get("description"),
|
||||
target_spec.get("ocr_description"),
|
||||
target_spec.get("vlm_description"),
|
||||
)
|
||||
if semantic_label:
|
||||
normalized.setdefault("target_text", target_spec.get("target_text") or semantic_label)
|
||||
normalized.setdefault("target_description", semantic_label)
|
||||
if target_spec:
|
||||
normalized["target_spec"] = target_spec
|
||||
normalized["visual_mode"] = True # Signal à l'agent d'utiliser la résolution visuelle
|
||||
@@ -2004,6 +2140,7 @@ def _handle_extract_table_action(
|
||||
output_var : nom de variable runtime (default "table_rows")
|
||||
pattern : regex à matcher sur chaque token OCR (ex : r"^25\\d{6}$")
|
||||
limit : nb max d'entrées à retourner
|
||||
engine : easyocr (défaut) ou tesseract/digits/ipp pour chiffres
|
||||
region : (x, y, w, h) en pixels pour cropper avant OCR
|
||||
(None = image entière)
|
||||
|
||||
@@ -2014,6 +2151,7 @@ def _handle_extract_table_action(
|
||||
output_var = (params.get("output_var") or params.get("variable_name") or "table_rows").strip()
|
||||
pattern = params.get("pattern") or None
|
||||
limit = params.get("limit")
|
||||
engine = params.get("engine") or "easyocr"
|
||||
region = params.get("region") or None
|
||||
if isinstance(limit, str):
|
||||
try:
|
||||
@@ -2058,6 +2196,7 @@ def _handle_extract_table_action(
|
||||
region=tuple(region) if region else None,
|
||||
pattern=pattern,
|
||||
limit=limit,
|
||||
engine=engine,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
@@ -2071,8 +2210,8 @@ def _handle_extract_table_action(
|
||||
|
||||
replay_state.setdefault("variables", {})[output_var] = rows
|
||||
logger.info(
|
||||
"extract_table → variable '%s' (%d entrées, pattern=%r, limit=%s) replay %s",
|
||||
output_var, len(rows), pattern, limit, replay_state.get("replay_id", "?"),
|
||||
"extract_table → variable '%s' (%d entrées, pattern=%r, limit=%s, engine=%s) replay %s",
|
||||
output_var, len(rows), pattern, limit, engine, replay_state.get("replay_id", "?"),
|
||||
)
|
||||
return bool(rows)
|
||||
|
||||
@@ -2410,6 +2549,29 @@ def _expand_compound_steps(
|
||||
action["x_pct"] = step.get("x_pct", 0.0)
|
||||
action["y_pct"] = step.get("y_pct", 0.0)
|
||||
action["button"] = step.get("button", "left")
|
||||
target_spec: Dict[str, Any] = {}
|
||||
_copy_semantic_target_fields(
|
||||
target_spec,
|
||||
step,
|
||||
step.get("target_spec") if isinstance(step, dict) else None,
|
||||
step.get("visual_anchor") if isinstance(step, dict) else None,
|
||||
)
|
||||
semantic_label = _first_non_empty_text(
|
||||
target_spec.get("by_text"),
|
||||
target_spec.get("target_text"),
|
||||
target_spec.get("description"),
|
||||
target_spec.get("ocr_description"),
|
||||
target_spec.get("vlm_description"),
|
||||
)
|
||||
if semantic_label:
|
||||
action.setdefault(
|
||||
"target_text",
|
||||
target_spec.get("target_text") or semantic_label,
|
||||
)
|
||||
action.setdefault("target_description", semantic_label)
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
action["visual_mode"] = True
|
||||
|
||||
else:
|
||||
logger.debug(f"Step compound inconnu : {step_type}")
|
||||
@@ -2659,6 +2821,8 @@ def _create_replay_state(
|
||||
a_copy = {
|
||||
"action_id": a.get("action_id"),
|
||||
"type": a.get("type"),
|
||||
"keys": a.get("keys"),
|
||||
"button": a.get("button"),
|
||||
"x_pct": a.get("x_pct"),
|
||||
"y_pct": a.get("y_pct"),
|
||||
# Contrôle strict des étapes (Dom, matin 10 avril 2026)
|
||||
@@ -2667,6 +2831,9 @@ def _create_replay_state(
|
||||
"expected_window_title": a.get("expected_window_title", ""),
|
||||
# Contexte métier utile pour logs et apprentissage
|
||||
"intention": a.get("intention", ""),
|
||||
"target_text": a.get("target_text", ""),
|
||||
"target_description": a.get("target_description", ""),
|
||||
"description": a.get("description", ""),
|
||||
}
|
||||
ts = a.get("target_spec")
|
||||
if isinstance(ts, dict):
|
||||
|
||||
Reference in New Issue
Block a user