#!/bin/bash # Load Sample Data Script # This script sets up the database and loads sample data for testing set -e echo "==========================================" echo "OMOP Sample Data Loading Script" echo "==========================================" echo "" # Colors for output GREEN='\033[0;32m' YELLOW='\033[1;33m' RED='\033[0;31m' NC='\033[0m' # No Color # Check if we're in the right directory if [ ! -f "setup.py" ]; then echo -e "${RED}Error: Must be run from omop directory${NC}" exit 1 fi # Step 1: Install dependencies echo -e "${YELLOW}Step 1: Installing dependencies...${NC}" pip install faker > /dev/null 2>&1 || echo "Faker already installed" echo -e "${GREEN}✓ Dependencies installed${NC}" echo "" # Step 2: Create database schemas echo -e "${YELLOW}Step 2: Creating database schemas...${NC}" python -m src.cli.commands schema create --type all 2>/dev/null || echo "Schemas may already exist" echo -e "${GREEN}✓ Schemas ready${NC}" echo "" # Step 3: Generate and load sample data echo -e "${YELLOW}Step 3: Generating and loading sample data...${NC}" python scripts/generate_sample_data.py echo -e "${GREEN}✓ Sample data loaded${NC}" echo "" # Step 4: Verify data echo -e "${YELLOW}Step 4: Verifying loaded data...${NC}" python -c " from src.utils.config import Config from src.utils.db_connection import DatabaseConnection from sqlalchemy import text config = Config.load('config.yaml') db = DatabaseConnection(config) with db.get_session() as session: # Count records in staging tables tables = ['raw_patients', 'raw_visits', 'raw_conditions', 'raw_drugs'] print('\nStaging Table Counts:') print('-' * 40) for table in tables: query = text(f'SELECT COUNT(*) FROM staging.{table}') count = session.execute(query).fetchone()[0] print(f' staging.{table:20s}: {count:5d} records') print('-' * 40) " echo -e "${GREEN}✓ Data verification complete${NC}" echo "" echo "==========================================" echo -e "${GREEN}Sample data loading complete!${NC}" echo "==========================================" echo "" echo "Next steps:" echo " 1. Run ETL pipeline:" echo " omop-pipeline etl run --source staging.raw_patients --target person" echo "" echo " 2. View statistics:" echo " omop-pipeline stats show" echo "" echo " 3. Validate data:" echo " omop-pipeline validate" echo ""