feat: import Excel → SQLite + boucle données → UI dans le VWB
- ExcelImporter : import .xlsx → SQLite auto (détection types, batch insert)
- DBIterator : lecture ligne par ligne avec filtre/tri/limite
- VWB actions : "Importer Excel" + "Pour chaque ligne" dans la palette
- DAG executor : pré-exécution import, boucle foreach avec injection
${current_row.colonne} dans les étapes dépendantes
- 36 tests unitaires Excel/DB (tous passent)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ Auteur : Dom, Claude — 16 mars 2026
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
@@ -37,6 +38,15 @@ from core.execution.dag_executor import (
|
||||
)
|
||||
from core.execution.llm_actions import LLMActionHandler
|
||||
|
||||
# Data loop — import Excel et itération sur tables
|
||||
try:
|
||||
from core.data.excel_importer import ExcelImporter
|
||||
from core.data.db_iterator import DBIterator
|
||||
_DATA_LOOP_AVAILABLE = True
|
||||
except ImportError as _e:
|
||||
logger.warning("Module core.data indisponible : %s", _e)
|
||||
_DATA_LOOP_AVAILABLE = False
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Types d'actions VWB → StepType du DAGExecutor
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -63,6 +73,9 @@ _WAIT_ACTION_TYPES = {"wait_for_anchor"}
|
||||
# Actions VWB de type condition
|
||||
_CONDITION_ACTION_TYPES = {"visual_condition"}
|
||||
|
||||
# Actions VWB de type data loop
|
||||
_DATA_LOOP_ACTION_TYPES = {"import_excel", "db_foreach"}
|
||||
|
||||
|
||||
def _classify_step_type(action_type: str) -> StepType:
|
||||
"""Détermine le StepType DAG à partir du action_type VWB."""
|
||||
@@ -72,6 +85,7 @@ def _classify_step_type(action_type: str) -> StepType:
|
||||
return StepType.WAIT
|
||||
if action_type in _CONDITION_ACTION_TYPES:
|
||||
return StepType.CONDITION
|
||||
# import_excel et db_foreach sont des UI_ACTION traitées spécialement
|
||||
return StepType.UI_ACTION
|
||||
|
||||
|
||||
@@ -157,6 +171,270 @@ def _convert_vwb_to_dag_steps(
|
||||
return dag_steps
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Exécution des étapes Data Loop (import_excel, db_foreach)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CURRENT_ROW_PATTERN = re.compile(r"\$\{current_row\.(\w+)\}")
|
||||
|
||||
|
||||
def _execute_import_excel(parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Exécute l'import d'un fichier Excel dans la base SQLite.
|
||||
|
||||
Args:
|
||||
parameters: Doit contenir 'file_path', optionnellement 'table_name', 'sheet_name'
|
||||
|
||||
Returns:
|
||||
Dict avec les infos d'import (table_name, row_count, columns)
|
||||
"""
|
||||
if not _DATA_LOOP_AVAILABLE:
|
||||
raise RuntimeError(
|
||||
"Module core.data non disponible. "
|
||||
"Vérifiez que core/data/excel_importer.py existe."
|
||||
)
|
||||
|
||||
file_path = parameters.get("file_path", "")
|
||||
if not file_path:
|
||||
raise ValueError("Paramètre 'file_path' requis pour import_excel")
|
||||
|
||||
table_name = parameters.get("table_name") or None
|
||||
sheet_name = parameters.get("sheet_name") or None
|
||||
|
||||
# Convertir sheet_name en int si c'est un index numérique
|
||||
if sheet_name and sheet_name.isdigit():
|
||||
sheet_name = int(sheet_name)
|
||||
|
||||
importer = ExcelImporter()
|
||||
result = importer.import_file(
|
||||
file_path=file_path,
|
||||
table_name=table_name,
|
||||
sheet_name=sheet_name,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Import Excel terminé : %s → table '%s' (%d lignes, colonnes: %s)",
|
||||
file_path,
|
||||
result.table_name,
|
||||
result.row_count,
|
||||
result.columns,
|
||||
)
|
||||
|
||||
return {
|
||||
"table_name": result.table_name,
|
||||
"row_count": result.row_count,
|
||||
"columns": result.columns,
|
||||
"db_path": str(result.db_path) if hasattr(result, "db_path") else None,
|
||||
}
|
||||
|
||||
|
||||
def _inject_current_row(
|
||||
action: Dict[str, Any], row: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""Remplace les références ${current_row.column} dans les paramètres d'une action.
|
||||
|
||||
Args:
|
||||
action: Dict des paramètres de l'étape
|
||||
row: Dict représentant la ligne courante (colonne → valeur)
|
||||
|
||||
Returns:
|
||||
Copie de l'action avec les références remplacées
|
||||
"""
|
||||
import copy
|
||||
resolved = copy.deepcopy(action)
|
||||
|
||||
def _resolve(obj: Any) -> Any:
|
||||
if isinstance(obj, str):
|
||||
# Cas spécial : la chaîne entière est une seule référence
|
||||
m = _CURRENT_ROW_PATTERN.fullmatch(obj)
|
||||
if m:
|
||||
col = m.group(1)
|
||||
return row.get(col, obj)
|
||||
# Cas général : remplacement inline
|
||||
def _replacer(match: re.Match) -> str:
|
||||
col = match.group(1)
|
||||
val = row.get(col)
|
||||
return str(val) if val is not None else match.group(0)
|
||||
return _CURRENT_ROW_PATTERN.sub(_replacer, obj)
|
||||
elif isinstance(obj, dict):
|
||||
return {k: _resolve(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [_resolve(item) for item in obj]
|
||||
return obj
|
||||
|
||||
return _resolve(resolved)
|
||||
|
||||
|
||||
def _execute_db_foreach(
|
||||
foreach_step_data: Dict[str, Any],
|
||||
all_steps_data: List[Dict[str, Any]],
|
||||
edges_data: List[Dict[str, Any]],
|
||||
executor_kwargs: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""Exécute une boucle db_foreach : lit les lignes puis ré-exécute les étapes dépendantes.
|
||||
|
||||
Algorithme :
|
||||
1. Lire toutes les lignes de la table (avec filtres optionnels)
|
||||
2. Identifier les étapes qui dépendent du foreach (via les edges)
|
||||
3. Pour chaque ligne, injecter ${current_row.column} et exécuter ces étapes
|
||||
|
||||
Args:
|
||||
foreach_step_data: Les données de l'étape db_foreach
|
||||
all_steps_data: Toutes les étapes du workflow
|
||||
edges_data: Les edges du workflow
|
||||
executor_kwargs: Paramètres pour le DAGExecutor (model, ollama_endpoint, timeout)
|
||||
|
||||
Returns:
|
||||
Dict avec le résumé de l'exécution de la boucle
|
||||
"""
|
||||
if not _DATA_LOOP_AVAILABLE:
|
||||
raise RuntimeError(
|
||||
"Module core.data non disponible. "
|
||||
"Vérifiez que core/data/db_iterator.py existe."
|
||||
)
|
||||
|
||||
parameters = foreach_step_data.get("parameters", {})
|
||||
table_name = parameters.get("table_name", "")
|
||||
if not table_name:
|
||||
raise ValueError("Paramètre 'table_name' requis pour db_foreach")
|
||||
|
||||
where_clause = parameters.get("where_clause") or None
|
||||
order_by = parameters.get("order_by") or None
|
||||
limit = parameters.get("limit")
|
||||
if limit is not None:
|
||||
try:
|
||||
limit = int(limit)
|
||||
except (ValueError, TypeError):
|
||||
limit = None
|
||||
|
||||
# 1. Lire les lignes
|
||||
iterator = DBIterator()
|
||||
rows = list(iterator.iterate(
|
||||
table_name=table_name,
|
||||
where=where_clause,
|
||||
order_by=order_by,
|
||||
limit=limit,
|
||||
))
|
||||
|
||||
if not rows:
|
||||
logger.info("db_foreach : table '%s' vide, aucune itération", table_name)
|
||||
return {"row_count": 0, "iterations": [], "table_name": table_name}
|
||||
|
||||
logger.info(
|
||||
"db_foreach : %d lignes à traiter dans '%s'", len(rows), table_name
|
||||
)
|
||||
|
||||
# 2. Identifier les étapes dépendantes du foreach
|
||||
foreach_id = foreach_step_data["id"]
|
||||
dependent_ids = set()
|
||||
for edge in edges_data:
|
||||
if edge.get("source") == foreach_id:
|
||||
dependent_ids.add(edge["target"])
|
||||
|
||||
# Aussi inclure les étapes qui dépendent des dépendants directs (chaîne)
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
for edge in edges_data:
|
||||
if edge.get("source") in dependent_ids and edge["target"] not in dependent_ids:
|
||||
dependent_ids.add(edge["target"])
|
||||
changed = True
|
||||
|
||||
if not dependent_ids:
|
||||
logger.warning("db_foreach '%s' : aucune étape dépendante trouvée", foreach_id)
|
||||
return {"row_count": len(rows), "iterations": [], "table_name": table_name}
|
||||
|
||||
# Filtrer les étapes et edges pour ne garder que le sous-graphe
|
||||
sub_steps = [s for s in all_steps_data if s["id"] in dependent_ids]
|
||||
sub_edges = [
|
||||
e for e in edges_data
|
||||
if e.get("source") in dependent_ids and e.get("target") in dependent_ids
|
||||
]
|
||||
|
||||
# 3. Pour chaque ligne, injecter et exécuter
|
||||
iteration_results = []
|
||||
model = executor_kwargs.get("model", "qwen3-vl:8b")
|
||||
ollama_endpoint = executor_kwargs.get("ollama_endpoint", "http://localhost:11434")
|
||||
timeout = executor_kwargs.get("timeout", 300)
|
||||
|
||||
for row_idx, row in enumerate(rows):
|
||||
logger.info(
|
||||
"db_foreach iteration %d/%d : %s",
|
||||
row_idx + 1, len(rows), {k: str(v)[:50] for k, v in row.items()}
|
||||
)
|
||||
|
||||
# Injecter ${current_row.xxx} dans les paramètres de chaque étape
|
||||
injected_steps = []
|
||||
for step_data in sub_steps:
|
||||
injected = dict(step_data)
|
||||
injected["parameters"] = _inject_current_row(
|
||||
step_data.get("parameters", {}), row
|
||||
)
|
||||
injected_steps.append(injected)
|
||||
|
||||
# Reconstruire les edges internes (sans la dépendance vers foreach)
|
||||
internal_edges = []
|
||||
for e in sub_edges:
|
||||
internal_edges.append(e)
|
||||
|
||||
# Pour les étapes qui dépendaient du foreach, supprimer cette dépendance
|
||||
# (elles deviennent des racines du sous-DAG)
|
||||
root_ids = set()
|
||||
for edge in edges_data:
|
||||
if edge.get("source") == foreach_id and edge["target"] in dependent_ids:
|
||||
root_ids.add(edge["target"])
|
||||
|
||||
# Convertir en DAG steps
|
||||
dag_steps = _convert_vwb_to_dag_steps(injected_steps, internal_edges)
|
||||
|
||||
# Supprimer la dépendance vers foreach_id dans les étapes racines
|
||||
for ds in dag_steps:
|
||||
ds.depends_on = [d for d in ds.depends_on if d != foreach_id]
|
||||
|
||||
# Vérifier s'il y a des étapes LLM
|
||||
has_llm = any(s.step_type == StepType.LLM_CALL for s in dag_steps)
|
||||
|
||||
llm_handler = None
|
||||
if has_llm:
|
||||
llm_handler = LLMActionHandler(
|
||||
ollama_endpoint=ollama_endpoint,
|
||||
model=model,
|
||||
)
|
||||
|
||||
executor = DAGExecutor(
|
||||
max_llm_workers=2,
|
||||
max_ui_workers=1,
|
||||
llm_handler=llm_handler,
|
||||
)
|
||||
executor.load_workflow(dag_steps)
|
||||
|
||||
result = executor.execute(timeout=timeout)
|
||||
|
||||
iteration_results.append({
|
||||
"row_index": row_idx,
|
||||
"row_data": row,
|
||||
"success": result.success,
|
||||
"results": result.results,
|
||||
"errors": result.errors,
|
||||
"duration": result.duration_seconds,
|
||||
})
|
||||
|
||||
if not result.success:
|
||||
logger.warning(
|
||||
"db_foreach iteration %d échouée : %s",
|
||||
row_idx, result.errors,
|
||||
)
|
||||
|
||||
# Résumé
|
||||
success_count = sum(1 for r in iteration_results if r["success"])
|
||||
return {
|
||||
"table_name": table_name,
|
||||
"row_count": len(rows),
|
||||
"success_count": success_count,
|
||||
"error_count": len(rows) - success_count,
|
||||
"iterations": iteration_results,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Instance globale du dernier exécuteur (pour le status polling)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -232,8 +510,107 @@ def execute_dag(workflow_id: str):
|
||||
model = data.get("model", "qwen3-vl:8b")
|
||||
ollama_endpoint = data.get("ollama_endpoint", "http://localhost:11434")
|
||||
|
||||
executor_kwargs = {
|
||||
"timeout": timeout,
|
||||
"model": model,
|
||||
"ollama_endpoint": ollama_endpoint,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Pré-traitement des étapes Data Loop (import_excel, db_foreach)
|
||||
# ---------------------------------------------------------------
|
||||
data_loop_results: Dict[str, Any] = {}
|
||||
|
||||
# 1. Exécuter les imports Excel en premier (avant le DAG principal)
|
||||
import_steps = [s for s in steps_data if s["action_type"] == "import_excel"]
|
||||
for imp_step in import_steps:
|
||||
try:
|
||||
imp_result = _execute_import_excel(imp_step.get("parameters", {}))
|
||||
data_loop_results[imp_step["id"]] = imp_result
|
||||
logger.info(
|
||||
"Import Excel '%s' terminé : %s",
|
||||
imp_step["id"], imp_result,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Import Excel '%s' échoué : %s", imp_step["id"], exc)
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': f"Import Excel échoué : {exc}",
|
||||
}), 500
|
||||
|
||||
# 2. Traiter les étapes db_foreach séparément
|
||||
foreach_steps = [s for s in steps_data if s["action_type"] == "db_foreach"]
|
||||
for fe_step in foreach_steps:
|
||||
try:
|
||||
fe_result = _execute_db_foreach(
|
||||
fe_step, steps_data, edges_data, executor_kwargs,
|
||||
)
|
||||
data_loop_results[fe_step["id"]] = fe_result
|
||||
logger.info(
|
||||
"db_foreach '%s' terminé : %d lignes, %d succès",
|
||||
fe_step["id"],
|
||||
fe_result.get("row_count", 0),
|
||||
fe_result.get("success_count", 0),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("db_foreach '%s' échoué : %s", fe_step["id"], exc)
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': f"Boucle données échouée : {exc}",
|
||||
}), 500
|
||||
|
||||
# 3. Filtrer les étapes data loop + leurs dépendants (déjà exécutés)
|
||||
# pour le DAG principal
|
||||
foreach_ids = {s["id"] for s in foreach_steps}
|
||||
already_executed_ids = set()
|
||||
for fe_id in foreach_ids:
|
||||
already_executed_ids.add(fe_id)
|
||||
# Trouver récursivement tous les dépendants
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
for edge in edges_data:
|
||||
if (edge.get("source") in already_executed_ids
|
||||
and edge["target"] not in already_executed_ids):
|
||||
already_executed_ids.add(edge["target"])
|
||||
changed = True
|
||||
|
||||
# Ajouter les import_excel aux déjà exécutés
|
||||
for imp_step in import_steps:
|
||||
already_executed_ids.add(imp_step["id"])
|
||||
|
||||
# Filtrer les étapes restantes pour le DAG principal
|
||||
remaining_steps = [s for s in steps_data if s["id"] not in already_executed_ids]
|
||||
remaining_edges = [
|
||||
e for e in edges_data
|
||||
if e.get("source") not in already_executed_ids
|
||||
and e.get("target") not in already_executed_ids
|
||||
]
|
||||
|
||||
# Si toutes les étapes sont des data loop, retourner directement
|
||||
if not remaining_steps:
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'execution': {
|
||||
"success": True,
|
||||
"data_loop_results": data_loop_results,
|
||||
"steps": {},
|
||||
"results": {},
|
||||
"errors": [],
|
||||
"duration_seconds": 0,
|
||||
},
|
||||
})
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Exécution DAG normale pour les étapes restantes
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
# Convertir en étapes DAG
|
||||
dag_steps = _convert_vwb_to_dag_steps(steps_data, edges_data)
|
||||
dag_steps = _convert_vwb_to_dag_steps(remaining_steps, remaining_edges)
|
||||
|
||||
# Supprimer les dépendances vers des étapes data loop déjà exécutées
|
||||
for ds in dag_steps:
|
||||
ds.depends_on = [d for d in ds.depends_on if d not in already_executed_ids]
|
||||
|
||||
# Vérifier s'il y a des étapes LLM
|
||||
has_llm_steps = any(s.step_type == StepType.LLM_CALL for s in dag_steps)
|
||||
@@ -253,6 +630,11 @@ def execute_dag(workflow_id: str):
|
||||
llm_handler=llm_handler,
|
||||
)
|
||||
|
||||
# Injecter les résultats des étapes data loop dans l'exécuteur
|
||||
# pour qu'ils soient disponibles via ${step_id.result}
|
||||
for step_id, dl_result in data_loop_results.items():
|
||||
executor._results[step_id] = dl_result
|
||||
|
||||
# Charger le workflow dans le DAG
|
||||
executor.load_workflow(dag_steps)
|
||||
|
||||
@@ -261,16 +643,22 @@ def execute_dag(workflow_id: str):
|
||||
_last_result = None
|
||||
|
||||
logger.info(
|
||||
"Lancement exécution DAG pour workflow '%s' : %d étapes (%d LLM)",
|
||||
"Lancement exécution DAG pour workflow '%s' : %d étapes (%d LLM, %d data loop pré-traités)",
|
||||
workflow_id,
|
||||
len(dag_steps),
|
||||
sum(1 for s in dag_steps if s.step_type == StepType.LLM_CALL),
|
||||
len(data_loop_results),
|
||||
)
|
||||
|
||||
# Exécuter (bloquant — le timeout protège)
|
||||
result = executor.execute(timeout=timeout)
|
||||
_last_result = result
|
||||
|
||||
# Fusionner les résultats data loop
|
||||
result_dict = result.to_dict()
|
||||
if data_loop_results:
|
||||
result_dict["data_loop_results"] = data_loop_results
|
||||
|
||||
logger.info(
|
||||
"Exécution DAG terminée : success=%s, durée=%.2fs",
|
||||
result.success,
|
||||
@@ -279,7 +667,7 @@ def execute_dag(workflow_id: str):
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'execution': result.to_dict(),
|
||||
'execution': result_dict,
|
||||
})
|
||||
|
||||
except ValueError as e:
|
||||
|
||||
@@ -315,6 +315,23 @@ VWB_ACTION_CONTRACTS: Dict[str, ActionContract] = {
|
||||
param_validators={"visual_anchor": lambda p: has_visual_anchor({"visual_anchor": p})}
|
||||
),
|
||||
|
||||
# --- BOUCLE DONNÉES (Data Loop) ---
|
||||
"import_excel": ActionContract(
|
||||
action_type="import_excel",
|
||||
description="Importer un fichier Excel dans la base SQLite",
|
||||
required_params=["file_path"],
|
||||
optional_params=["table_name", "sheet_name"],
|
||||
param_validators={"file_path": lambda p: bool(p and isinstance(p, str) and p.strip())}
|
||||
),
|
||||
|
||||
"db_foreach": ActionContract(
|
||||
action_type="db_foreach",
|
||||
description="Boucle sur chaque ligne d'une table et exécute les étapes dépendantes",
|
||||
required_params=["table_name"],
|
||||
optional_params=["where_clause", "order_by", "limit"],
|
||||
param_validators={"table_name": lambda p: bool(p and isinstance(p, str) and p.strip())}
|
||||
),
|
||||
|
||||
# --- ACTIONS DAG LLM — Exécutées via le DAGExecutor ---
|
||||
"llm_analyze": ActionContract(
|
||||
action_type="llm_analyze",
|
||||
|
||||
Reference in New Issue
Block a user