Files
aivanov_database/omop/scripts/setup_database.sh
2026-03-05 01:20:15 +01:00

92 lines
3.3 KiB
Bash
Executable File

#!/bin/bash
# Database Setup Script for OMOP Data Pipeline
# This script creates the database and schemas for the OMOP pipeline
set -e # Exit on error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Configuration (can be overridden by environment variables)
DB_HOST="${DB_HOST:-localhost}"
DB_PORT="${DB_PORT:-5432}"
DB_NAME="${DB_NAME:-omop_db}"
DB_USER="${DB_USER:-postgres}"
DB_PASSWORD="${DB_PASSWORD:-}"
ADMIN_USER="${ADMIN_USER:-postgres}"
echo -e "${GREEN}OMOP Database Setup${NC}"
echo "================================"
echo "Host: $DB_HOST"
echo "Port: $DB_PORT"
echo "Database: $DB_NAME"
echo "User: $DB_USER"
echo "================================"
echo ""
# Check if PostgreSQL is running
echo -e "${YELLOW}Checking PostgreSQL connection...${NC}"
if ! pg_isready -h "$DB_HOST" -p "$DB_PORT" > /dev/null 2>&1; then
echo -e "${RED}Error: Cannot connect to PostgreSQL at $DB_HOST:$DB_PORT${NC}"
echo "Please ensure PostgreSQL is running and accessible."
exit 1
fi
echo -e "${GREEN}✓ PostgreSQL is running${NC}"
echo ""
# Create database if it doesn't exist
echo -e "${YELLOW}Creating database...${NC}"
if PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$ADMIN_USER" -lqt | cut -d \| -f 1 | grep -qw "$DB_NAME"; then
echo -e "${YELLOW}Database $DB_NAME already exists${NC}"
else
PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$ADMIN_USER" -c "CREATE DATABASE $DB_NAME;"
echo -e "${GREEN}✓ Database $DB_NAME created${NC}"
fi
echo ""
# Create user if it doesn't exist
echo -e "${YELLOW}Creating database user...${NC}"
if PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$ADMIN_USER" -d "$DB_NAME" -tAc "SELECT 1 FROM pg_roles WHERE rolname='$DB_USER'" | grep -q 1; then
echo -e "${YELLOW}User $DB_USER already exists${NC}"
else
PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$ADMIN_USER" -d "$DB_NAME" -c "CREATE USER $DB_USER WITH PASSWORD '$DB_PASSWORD';"
echo -e "${GREEN}✓ User $DB_USER created${NC}"
fi
echo ""
# Grant privileges
echo -e "${YELLOW}Granting privileges...${NC}"
PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$ADMIN_USER" -d "$DB_NAME" <<EOF
GRANT ALL PRIVILEGES ON DATABASE $DB_NAME TO $DB_USER;
GRANT ALL ON SCHEMA public TO $DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO $DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO $DB_USER;
EOF
echo -e "${GREEN}✓ Privileges granted${NC}"
echo ""
# Create schemas using the Python CLI
echo -e "${YELLOW}Creating OMOP schemas...${NC}"
if command -v omop-pipeline &> /dev/null; then
omop-pipeline schema create --type all
echo -e "${GREEN}✓ OMOP schemas created${NC}"
else
echo -e "${YELLOW}Warning: omop-pipeline command not found${NC}"
echo "Please install the package with: pip install -e ."
echo "Then run: omop-pipeline schema create --type all"
fi
echo ""
echo -e "${GREEN}================================${NC}"
echo -e "${GREEN}Database setup completed!${NC}"
echo -e "${GREEN}================================${NC}"
echo ""
echo "Next steps:"
echo "1. Load vocabularies: omop-pipeline vocab load --path /path/to/vocabularies"
echo "2. Load staging data into staging tables"
echo "3. Run ETL: omop-pipeline etl run --source staging.raw_patients --target person"
echo ""