#!/bin/bash # Vocabulary Loading Script for OMOP Data Pipeline # This script downloads and loads OMOP vocabularies set -e # Exit on error # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Configuration VOCAB_DIR="${VOCAB_DIR:-./vocabularies}" ATHENA_URL="https://athena.ohdsi.org/" echo -e "${GREEN}OMOP Vocabulary Loader${NC}" echo "================================" echo "Vocabulary directory: $VOCAB_DIR" echo "================================" echo "" # Check if vocabulary directory exists if [ ! -d "$VOCAB_DIR" ]; then echo -e "${YELLOW}Vocabulary directory not found: $VOCAB_DIR${NC}" echo "" echo "To download OMOP vocabularies:" echo "1. Visit $ATHENA_URL" echo "2. Select the vocabularies you need" echo "3. Download the vocabulary bundle" echo "4. Extract to $VOCAB_DIR" echo "" echo "Required vocabularies for basic functionality:" echo " - SNOMED" echo " - ICD10CM" echo " - RxNorm" echo " - LOINC" echo " - CPT4" echo "" exit 1 fi # Check for required vocabulary files echo -e "${YELLOW}Checking vocabulary files...${NC}" REQUIRED_FILES=( "CONCEPT.csv" "VOCABULARY.csv" "DOMAIN.csv" "CONCEPT_CLASS.csv" "CONCEPT_RELATIONSHIP.csv" "RELATIONSHIP.csv" ) MISSING_FILES=() for file in "${REQUIRED_FILES[@]}"; do if [ ! -f "$VOCAB_DIR/$file" ]; then MISSING_FILES+=("$file") fi done if [ ${#MISSING_FILES[@]} -gt 0 ]; then echo -e "${RED}Error: Missing required vocabulary files:${NC}" for file in "${MISSING_FILES[@]}"; do echo " - $file" done echo "" echo "Please ensure all vocabulary files are extracted to $VOCAB_DIR" exit 1 fi echo -e "${GREEN}✓ All required vocabulary files found${NC}" echo "" # Count records in vocabulary files echo -e "${YELLOW}Vocabulary file statistics:${NC}" for file in "${REQUIRED_FILES[@]}"; do if [ -f "$VOCAB_DIR/$file" ]; then count=$(wc -l < "$VOCAB_DIR/$file") echo " $file: $((count - 1)) records" fi done echo "" # Load vocabularies using Python CLI echo -e "${YELLOW}Loading vocabularies into database...${NC}" echo "This may take several minutes depending on vocabulary size..." echo "" if command -v omop-pipeline &> /dev/null; then omop-pipeline vocab load --path "$VOCAB_DIR" echo "" echo -e "${GREEN}✓ Vocabularies loaded successfully${NC}" else echo -e "${RED}Error: omop-pipeline command not found${NC}" echo "Please install the package with: pip install -e ." exit 1 fi echo "" echo -e "${GREEN}================================${NC}" echo -e "${GREEN}Vocabulary loading completed!${NC}" echo -e "${GREEN}================================${NC}" echo "" echo "You can now run the ETL pipeline:" echo " omop-pipeline etl run --source staging.raw_patients --target person" echo ""