rpa_vision_v3/visual_workflow_builder/backend/app.py

"""
Visual Workflow Builder - Backend Flask Application

This is the main entry point for the Visual Workflow Builder backend API.
It provides REST endpoints for workflow management and WebSocket support
for real-time execution updates.
"""

from flask import Flask
from flask_cors import CORS
from flask_socketio import SocketIO
from flask_caching import Cache
from flask_migrate import Migrate
import os
import logging
from pathlib import Path
from logging.handlers import RotatingFileHandler
from dotenv import load_dotenv

# Charger .env.local depuis la racine du projet AVANT tout : il contient
# RPA_API_TOKEN utilisé pour le proxy VWB → streaming server. Sans cela,
# le token est absent après chaque restart manuel du backend et tous les
# appels proxy renvoient 401 « Token API invalide ».
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
load_dotenv(_PROJECT_ROOT / '.env.local')
load_dotenv()  # fallback .env dans cwd (n'écrase pas les vars déjà définies)

# Initialize Flask app
app = Flask(__name__)

# ============================================================
# Logging — fichier rotatif + console (idempotent)
# ============================================================
# ATTENTION : ce module peut être importé 2 fois (une fois comme __main__
# via `python app.py`, puis comme module `app` via `from app import socketio`
# dans api/websocket_handlers.py). Sans garde idempotente, le RotatingFileHandler
# est ajouté 2× au root logger → chaque ligne loguée apparaît en double.
_log_dir = os.path.join(os.path.dirname(__file__), 'logs')
os.makedirs(_log_dir, exist_ok=True)
_LOG_FILE_PATH = os.path.abspath(os.path.join(_log_dir, 'vwb.log'))

_root_logger = logging.getLogger()
_already_configured = any(
    isinstance(h, RotatingFileHandler)
    and os.path.abspath(getattr(h, 'baseFilename', '')) == _LOG_FILE_PATH
    for h in _root_logger.handlers
)

if not _already_configured:
    _file_handler = RotatingFileHandler(
        _LOG_FILE_PATH,
        maxBytes=5 * 1024 * 1024,  # 5 MB
        backupCount=3
    )
    _file_handler.setLevel(logging.INFO)
    _file_handler.setFormatter(logging.Formatter(
        '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
    ))
    _root_logger.addHandler(_file_handler)
    _root_logger.setLevel(logging.INFO)

# Configuration
import secrets as _secrets
app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', _secrets.token_hex(32))
app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('DATABASE_URL', 'sqlite:///workflows.db')
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024  # 10MB max upload
app.config['CACHE_TYPE'] = 'redis' if os.getenv('REDIS_URL') else 'simple'
app.config['CACHE_REDIS_URL'] = os.getenv('REDIS_URL', 'redis://localhost:6379/0')

# Initialize extensions - Use db from v3 models (source of truth)
from db.models import db
db.init_app(app)

# Initialize Flask-Migrate for database migrations
migrate = Migrate(app, db)

cache = Cache(app)
_ALLOWED_ORIGINS = [
    "http://localhost:3002",
    "http://localhost:5002",
    "https://vwb.labs.laurinebazin.design",
    "https://lea.labs.laurinebazin.design",
]
socketio = SocketIO(
    app,
    cors_allowed_origins=_ALLOWED_ORIGINS,
    async_mode='threading',
    logger=True,
    engineio_logger=True
)

# Enable CORS - autoriser tous les ports locaux en développement
CORS(app, resources={
    r"/api/*": {
        "origins": os.getenv('CORS_ORIGINS', 'http://localhost:3000,http://localhost:3001,http://localhost:3002,http://localhost:3003,http://localhost:3004,http://localhost:5173').split(','),
        "methods": ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
        "allow_headers": ["Content-Type", "Authorization"]
    }
})

# Import and register blueprints (minimal set)
from api.workflows import workflows_bp
from api.screen_capture import screen_capture_bp
from api.real_demo import real_demo_bp
from api.errors import error_response

app.register_blueprint(workflows_bp, url_prefix='/api/workflows')
app.register_blueprint(screen_capture_bp, url_prefix='/api/screen-capture')
app.register_blueprint(real_demo_bp)

# Optional / Phase 2+ blueprints (loaded only if modules are available)
try:
    from api.self_healing import self_healing_bp
    app.register_blueprint(self_healing_bp)
except ImportError as e:
    print(f"⚠️  Blueprint self_healing désactivé: {e}")

try:
    from api.visual_targets import visual_targets_bp, init_visual_target_manager
    app.register_blueprint(visual_targets_bp)
    VISUAL_TARGETS_BP_AVAILABLE = True
except ImportError as e:
    print(f"⚠️  Blueprint visual_targets désactivé: {e}")
    VISUAL_TARGETS_BP_AVAILABLE = False
    init_visual_target_manager = None

try:
    from api.element_detection import element_detection_bp, init_element_detection
    app.register_blueprint(element_detection_bp)
    ELEMENT_DETECTION_BP_AVAILABLE = True
except ImportError as e:
    print(f"⚠️  Blueprint element_detection désactivé: {e}")
    ELEMENT_DETECTION_BP_AVAILABLE = False
    init_element_detection = None

try:
    from api.analytics import analytics_bp
    app.register_blueprint(analytics_bp, url_prefix='/api/analytics')
except ImportError:
    pass


# Register other blueprints (optional - depends on Phase 2+ services)
try:
    from api.templates import templates_bp
    app.register_blueprint(templates_bp, url_prefix='/api/templates')
except ImportError as e:
    print(f"⚠️  Blueprint templates désactivé: {e}")

from api.node_types import node_types_bp
app.register_blueprint(node_types_bp, url_prefix='/api/node-types')

try:
    from api.executions import executions_bp
    app.register_blueprint(executions_bp, url_prefix='/api/executions')
except ImportError as e:
    print(f"⚠️  Blueprint executions désactivé: {e}")

try:
    from api.import_export import import_export_bp
    app.register_blueprint(import_export_bp, url_prefix='/api')
except ImportError as e:
    print(f"⚠️  Blueprint import_export désactivé: {e}")

try:
    from api.correction_packs import correction_packs_bp
    app.register_blueprint(correction_packs_bp, url_prefix='/api')
    print("✅ Blueprint correction_packs enregistré")
except ImportError as e:
    print(f"⚠️  Blueprint correction_packs désactivé: {e}")

try:
    from api.coaching_sessions import coaching_sessions_bp
    app.register_blueprint(coaching_sessions_bp, url_prefix='/api/coaching-sessions')
    print("✅ Blueprint coaching_sessions enregistré")
except ImportError as e:
    print(f"⚠️  Blueprint coaching_sessions désactivé: {e}")

# Catalogue VWB - actions VisionOnly
# V2 avec VLM (Vision Language Model) pour détection intelligente
try:
    from catalog_routes_v2_vlm import catalog_bp, VLM_MODEL
    app.register_blueprint(catalog_bp)
    print(f"✅ Blueprint catalog V2 VLM (Ollama {VLM_MODEL}) enregistré")
except ImportError as e:
    print(f"⚠️  Blueprint catalog V2 VLM désactivé: {e}")

# API Images Ancres Visuelles - stockage serveur
try:
    from api.anchor_images import anchor_images_bp
    app.register_blueprint(anchor_images_bp)
    print("✅ Blueprint anchor_images enregistré")
except ImportError as e:
    print(f"⚠️  Blueprint anchor_images désactivé: {e}")

# API UI Detection - UI-DETR-1
try:
    from api.ui_detection import ui_detection_bp
    app.register_blueprint(ui_detection_bp)
    print("✅ Blueprint ui_detection (UI-DETR-1) enregistré - /api/ui-detection/*")
except ImportError as e:
    print(f"⚠️  Blueprint ui_detection désactivé: {e}")

# ============================================================
# API V3 - Thin Client Architecture (Source de Vérité Unique)
# ============================================================
try:
    from api_v3 import api_v3_bp
    app.register_blueprint(api_v3_bp)
    print("✅ Blueprint API v3 (Thin Client) enregistré - /api/v3/*")
except ImportError as e:
    print(f"⚠️  Blueprint API v3 désactivé: {e}")


# Import WebSocket handlers (optional)
try:
    from api import websocket_handlers  # noqa: F401
except Exception as e:
    print(f"⚠️  WebSocket handlers désactivés: {e}")

# ============================================================
# Headers de sécurité (sécurité HIGH)
# ============================================================
@app.after_request
def set_security_headers(response):
    """Ajouter les headers de sécurité à toutes les réponses."""
    response.headers['X-Content-Type-Options'] = 'nosniff'
    response.headers['X-Frame-Options'] = 'SAMEORIGIN'
    response.headers['X-XSS-Protection'] = '1; mode=block'
    response.headers['Referrer-Policy'] = 'strict-origin-when-cross-origin'
    response.headers['Content-Security-Policy'] = (
        "default-src 'self'; "
        "script-src 'self' 'unsafe-inline' 'unsafe-eval'; "
        "style-src 'self' 'unsafe-inline'; "
        "img-src 'self' data: blob:; "
        "connect-src 'self' ws: wss: http://localhost:* https://vwb.labs.laurinebazin.design https://lea.labs.laurinebazin.design; "
        "font-src 'self' data:; "
        "frame-ancestors 'self'"
    )
    return response


# Global error handlers
@app.errorhandler(404)
def not_found(error):
    """Handle 404 errors"""
    return error_response(404, "Resource not found")

@app.errorhandler(405)
def method_not_allowed(error):
    """Handle 405 errors"""
    return error_response(405, "Method not allowed")

@app.errorhandler(500)
def internal_error(error):
    """Handle 500 errors"""
    return error_response(500, "Internal server error")

@app.errorhandler(Exception)
def handle_exception(error):
    """Handle all unhandled exceptions"""
    import traceback
    traceback.print_exc()
    return error_response(500, f"Unexpected error: {str(error)}")

# Health check endpoint
@app.route('/health')
@app.route('/api/health')
def health_check():
    """Health check endpoint for monitoring"""
    from flask import jsonify
    return jsonify({'status': 'healthy', 'version': '1.0.0'})

# Workflow execution endpoint (proxy to catalog execute)
@app.route('/api/workflow/execute-step', methods=['POST'])
def execute_workflow_step():
    """Execute a workflow step via the catalog execute endpoint"""
    from flask import jsonify, request
    import requests

    try:
        data = request.get_json() or {}
        step_id = data.get('stepId', f'step_{int(__import__("time").time() * 1000)}')
        step_type = data.get('stepType', 'click_anchor')
        parameters = data.get('parameters', {})

        # Convert to catalog execute format
        catalog_request = {
            'type': step_type,
            'step_id': step_id,
            'parameters': parameters
        }

        # Call the internal catalog execute endpoint (v2 VLM)
        from catalog_routes_v2_vlm import catalog_bp

        # Direct execution via catalog
        try:
            # Import the execute function directly
            from catalog_routes_v2_vlm import execute_action as catalog_execute
            # We need to simulate Flask request context - use internal call
            from flask import current_app
            with current_app.test_request_context(
                '/api/vwb/catalog/execute',
                method='POST',
                data=__import__('json').dumps(catalog_request),
                content_type='application/json'
            ):
                response = catalog_execute()
                if hasattr(response, 'get_json'):
                    result = response.get_json()
                else:
                    result = __import__('json').loads(response[0].get_data(as_text=True))

                # Convert to expected format
                if result.get('success') and result.get('result'):
                    return jsonify({
                        'success': result['result'].get('status') == 'success',
                        'output': result['result'].get('output_data', {}),
                        'error': result['result'].get('error', {}).get('message') if result['result'].get('error') else None
                    })
                else:
                    return jsonify({
                        'success': False,
                        'error': result.get('error', 'Échec de l\'exécution')
                    })
        except Exception as inner_e:
            print(f"❌ Erreur exécution interne: {inner_e}")
            return jsonify({
                'success': False,
                'error': str(inner_e)
            })

    except Exception as e:
        print(f"❌ Erreur execute-step: {e}")
        return jsonify({
            'success': False,
            'error': str(e)
        }), 500

# Create database tables - only if migrations not available
# In production, use: flask db upgrade
import os
migrations_dir = os.path.join(os.path.dirname(__file__), 'migrations')
with app.app_context():
    if not os.path.exists(migrations_dir):
        # No migrations folder - use create_all for development
        db.create_all()
        print("✅ [DB] Tables créées avec db.create_all()")
    else:
        # Migrations available - check if alembic_version exists
        from sqlalchemy import inspect
        inspector = inspect(db.engine)
        if 'alembic_version' not in inspector.get_table_names():
            # First run with migrations - create tables and stamp
            db.create_all()
            print("✅ [DB] Tables créées, utiliser 'flask db stamp head' pour initialiser les migrations")

    # Migration manuelle : ajouter les colonnes review si elles n'existent pas
    from sqlalchemy import inspect as sa_inspect, text
    insp = sa_inspect(db.engine)
    if 'workflows' in insp.get_table_names():
        existing_cols = {col['name'] for col in insp.get_columns('workflows')}
        new_cols = {
            'source': "ALTER TABLE workflows ADD COLUMN source VARCHAR(64) DEFAULT 'manual'",
            'review_status': "ALTER TABLE workflows ADD COLUMN review_status VARCHAR(32)",
            'review_feedback': "ALTER TABLE workflows ADD COLUMN review_feedback TEXT",
            'reviewed_at': "ALTER TABLE workflows ADD COLUMN reviewed_at DATETIME",
        }
        for col_name, sql in new_cols.items():
            if col_name not in existing_cols:
                try:
                    db.session.execute(text(sql))
                    db.session.commit()
                    print(f"  [DB] Colonne '{col_name}' ajoutée à workflows")
                except Exception as e:
                    db.session.rollback()
                    print(f"  [DB] Colonne '{col_name}' déjà existante ou erreur: {e}")

    # Migration manuelle : ajouter les colonnes OCR/VLM aux ancres visuelles
    if 'visual_anchors' in insp.get_table_names():
        existing_anchor_cols = {col['name'] for col in insp.get_columns('visual_anchors')}
        new_anchor_cols = {
            'target_text': "ALTER TABLE visual_anchors ADD COLUMN target_text TEXT",
            'ocr_description': "ALTER TABLE visual_anchors ADD COLUMN ocr_description TEXT",
        }
        for col_name, sql in new_anchor_cols.items():
            if col_name not in existing_anchor_cols:
                try:
                    db.session.execute(text(sql))
                    db.session.commit()
                    print(f"  [DB] Colonne '{col_name}' ajoutée à visual_anchors")
                except Exception as e:
                    db.session.rollback()
                    print(f"  [DB] Colonne '{col_name}' déjà existante ou erreur: {e}")

# Initialize VisualTargetManager with RPA Vision V3 components (optional)
try:
    from core.capture.screen_capturer import ScreenCapturer
    from core.detection.ui_detector import UIDetector
    from core.embedding.fusion_engine import FusionEngine

    # Only initialize if the related blueprints were actually loaded
    if VISUAL_TARGETS_BP_AVAILABLE and init_visual_target_manager:
        screen_capturer = ScreenCapturer()
        ui_detector = UIDetector()
        fusion_engine = FusionEngine()
        init_visual_target_manager(screen_capturer, ui_detector, fusion_engine)

    if ELEMENT_DETECTION_BP_AVAILABLE and init_element_detection:
        # Reuse the same instances when possible
        if 'ui_detector' not in locals():
            ui_detector = UIDetector()
        if 'screen_capturer' not in locals():
            screen_capturer = ScreenCapturer()
        init_element_detection(ui_detector, screen_capturer)

    if (VISUAL_TARGETS_BP_AVAILABLE and init_visual_target_manager) or (ELEMENT_DETECTION_BP_AVAILABLE and init_element_detection):
        print("✅ Services visuels initialisés (VisualTargets / ElementDetection)")
except ImportError as e:
    print(f"⚠️  Core RPA non disponible pour l'initialisation visuelle: {e}")
except Exception as e:
    print(f"❌ Erreur lors de l'initialisation des services visuels: {e}")

# Pré-charger les modèles pour éviter la latence au premier appel
try:
    from services.ocr_service import preload as ocr_preload
    ocr_preload()
except Exception as e:
    print(f"⚠️ Pré-chargement OCR échoué: {e}")

if __name__ == '__main__':
    port = int(os.getenv('PORT', 5002))
    # Désactivation du mode debug pour stabiliser le laboratoire
    debug = False

    socketio.run(
        app,
        host='0.0.0.0',
        port=port,
        debug=False,
        use_reloader=False,
        allow_unsafe_werkzeug=True
    )