Initial commit

This commit is contained in:
Dom
2026-03-05 01:20:15 +01:00
commit c0c50e56f0
364 changed files with 62207 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
#!/bin/bash
# Load Sample Data Script
# This script sets up the database and loads sample data for testing
set -e
echo "=========================================="
echo "OMOP Sample Data Loading Script"
echo "=========================================="
echo ""
# Colors for output
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# Check if we're in the right directory
if [ ! -f "setup.py" ]; then
echo -e "${RED}Error: Must be run from omop directory${NC}"
exit 1
fi
# Step 1: Install dependencies
echo -e "${YELLOW}Step 1: Installing dependencies...${NC}"
pip install faker > /dev/null 2>&1 || echo "Faker already installed"
echo -e "${GREEN}✓ Dependencies installed${NC}"
echo ""
# Step 2: Create database schemas
echo -e "${YELLOW}Step 2: Creating database schemas...${NC}"
python -m src.cli.commands schema create --type all 2>/dev/null || echo "Schemas may already exist"
echo -e "${GREEN}✓ Schemas ready${NC}"
echo ""
# Step 3: Generate and load sample data
echo -e "${YELLOW}Step 3: Generating and loading sample data...${NC}"
python scripts/generate_sample_data.py
echo -e "${GREEN}✓ Sample data loaded${NC}"
echo ""
# Step 4: Verify data
echo -e "${YELLOW}Step 4: Verifying loaded data...${NC}"
python -c "
from src.utils.config import Config
from src.utils.db_connection import DatabaseConnection
from sqlalchemy import text
config = Config.load('config.yaml')
db = DatabaseConnection(config)
with db.get_session() as session:
# Count records in staging tables
tables = ['raw_patients', 'raw_visits', 'raw_conditions', 'raw_drugs']
print('\nStaging Table Counts:')
print('-' * 40)
for table in tables:
query = text(f'SELECT COUNT(*) FROM staging.{table}')
count = session.execute(query).fetchone()[0]
print(f' staging.{table:20s}: {count:5d} records')
print('-' * 40)
"
echo -e "${GREEN}✓ Data verification complete${NC}"
echo ""
echo "=========================================="
echo -e "${GREEN}Sample data loading complete!${NC}"
echo "=========================================="
echo ""
echo "Next steps:"
echo " 1. Run ETL pipeline:"
echo " omop-pipeline etl run --source staging.raw_patients --target person"
echo ""
echo " 2. View statistics:"
echo " omop-pipeline stats show"
echo ""
echo " 3. Validate data:"
echo " omop-pipeline validate"
echo ""