81 lines
2.3 KiB
Bash
Executable File
81 lines
2.3 KiB
Bash
Executable File
#!/bin/bash
|
|
# Load Sample Data Script
|
|
# This script sets up the database and loads sample data for testing
|
|
|
|
set -e
|
|
|
|
echo "=========================================="
|
|
echo "OMOP Sample Data Loading Script"
|
|
echo "=========================================="
|
|
echo ""
|
|
|
|
# Colors for output
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
RED='\033[0;31m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Check if we're in the right directory
|
|
if [ ! -f "setup.py" ]; then
|
|
echo -e "${RED}Error: Must be run from omop directory${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
# Step 1: Install dependencies
|
|
echo -e "${YELLOW}Step 1: Installing dependencies...${NC}"
|
|
pip install faker > /dev/null 2>&1 || echo "Faker already installed"
|
|
echo -e "${GREEN}✓ Dependencies installed${NC}"
|
|
echo ""
|
|
|
|
# Step 2: Create database schemas
|
|
echo -e "${YELLOW}Step 2: Creating database schemas...${NC}"
|
|
python -m src.cli.commands schema create --type all 2>/dev/null || echo "Schemas may already exist"
|
|
echo -e "${GREEN}✓ Schemas ready${NC}"
|
|
echo ""
|
|
|
|
# Step 3: Generate and load sample data
|
|
echo -e "${YELLOW}Step 3: Generating and loading sample data...${NC}"
|
|
python scripts/generate_sample_data.py
|
|
echo -e "${GREEN}✓ Sample data loaded${NC}"
|
|
echo ""
|
|
|
|
# Step 4: Verify data
|
|
echo -e "${YELLOW}Step 4: Verifying loaded data...${NC}"
|
|
python -c "
|
|
from src.utils.config import Config
|
|
from src.utils.db_connection import DatabaseConnection
|
|
from sqlalchemy import text
|
|
|
|
config = Config.load('config.yaml')
|
|
db = DatabaseConnection(config)
|
|
|
|
with db.get_session() as session:
|
|
# Count records in staging tables
|
|
tables = ['raw_patients', 'raw_visits', 'raw_conditions', 'raw_drugs']
|
|
|
|
print('\nStaging Table Counts:')
|
|
print('-' * 40)
|
|
for table in tables:
|
|
query = text(f'SELECT COUNT(*) FROM staging.{table}')
|
|
count = session.execute(query).fetchone()[0]
|
|
print(f' staging.{table:20s}: {count:5d} records')
|
|
print('-' * 40)
|
|
"
|
|
echo -e "${GREEN}✓ Data verification complete${NC}"
|
|
echo ""
|
|
|
|
echo "=========================================="
|
|
echo -e "${GREEN}Sample data loading complete!${NC}"
|
|
echo "=========================================="
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo " 1. Run ETL pipeline:"
|
|
echo " omop-pipeline etl run --source staging.raw_patients --target person"
|
|
echo ""
|
|
echo " 2. View statistics:"
|
|
echo " omop-pipeline stats show"
|
|
echo ""
|
|
echo " 3. Validate data:"
|
|
echo " omop-pipeline validate"
|
|
echo ""
|