Initial commit
This commit is contained in:
80
omop/scripts/load_sample_data.sh
Executable file
80
omop/scripts/load_sample_data.sh
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/bin/bash
|
||||
# Load Sample Data Script
|
||||
# This script sets up the database and loads sample data for testing
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo "OMOP Sample Data Loading Script"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Colors for output
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Check if we're in the right directory
|
||||
if [ ! -f "setup.py" ]; then
|
||||
echo -e "${RED}Error: Must be run from omop directory${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Step 1: Install dependencies
|
||||
echo -e "${YELLOW}Step 1: Installing dependencies...${NC}"
|
||||
pip install faker > /dev/null 2>&1 || echo "Faker already installed"
|
||||
echo -e "${GREEN}✓ Dependencies installed${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 2: Create database schemas
|
||||
echo -e "${YELLOW}Step 2: Creating database schemas...${NC}"
|
||||
python -m src.cli.commands schema create --type all 2>/dev/null || echo "Schemas may already exist"
|
||||
echo -e "${GREEN}✓ Schemas ready${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 3: Generate and load sample data
|
||||
echo -e "${YELLOW}Step 3: Generating and loading sample data...${NC}"
|
||||
python scripts/generate_sample_data.py
|
||||
echo -e "${GREEN}✓ Sample data loaded${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 4: Verify data
|
||||
echo -e "${YELLOW}Step 4: Verifying loaded data...${NC}"
|
||||
python -c "
|
||||
from src.utils.config import Config
|
||||
from src.utils.db_connection import DatabaseConnection
|
||||
from sqlalchemy import text
|
||||
|
||||
config = Config.load('config.yaml')
|
||||
db = DatabaseConnection(config)
|
||||
|
||||
with db.get_session() as session:
|
||||
# Count records in staging tables
|
||||
tables = ['raw_patients', 'raw_visits', 'raw_conditions', 'raw_drugs']
|
||||
|
||||
print('\nStaging Table Counts:')
|
||||
print('-' * 40)
|
||||
for table in tables:
|
||||
query = text(f'SELECT COUNT(*) FROM staging.{table}')
|
||||
count = session.execute(query).fetchone()[0]
|
||||
print(f' staging.{table:20s}: {count:5d} records')
|
||||
print('-' * 40)
|
||||
"
|
||||
echo -e "${GREEN}✓ Data verification complete${NC}"
|
||||
echo ""
|
||||
|
||||
echo "=========================================="
|
||||
echo -e "${GREEN}Sample data loading complete!${NC}"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Run ETL pipeline:"
|
||||
echo " omop-pipeline etl run --source staging.raw_patients --target person"
|
||||
echo ""
|
||||
echo " 2. View statistics:"
|
||||
echo " omop-pipeline stats show"
|
||||
echo ""
|
||||
echo " 3. Validate data:"
|
||||
echo " omop-pipeline validate"
|
||||
echo ""
|
||||
Reference in New Issue
Block a user