Initial commit
This commit is contained in:
106
omop/scripts/load_vocabularies.sh
Executable file
106
omop/scripts/load_vocabularies.sh
Executable file
@@ -0,0 +1,106 @@
|
||||
#!/bin/bash
|
||||
# Vocabulary Loading Script for OMOP Data Pipeline
|
||||
# This script downloads and loads OMOP vocabularies
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
VOCAB_DIR="${VOCAB_DIR:-./vocabularies}"
|
||||
ATHENA_URL="https://athena.ohdsi.org/"
|
||||
|
||||
echo -e "${GREEN}OMOP Vocabulary Loader${NC}"
|
||||
echo "================================"
|
||||
echo "Vocabulary directory: $VOCAB_DIR"
|
||||
echo "================================"
|
||||
echo ""
|
||||
|
||||
# Check if vocabulary directory exists
|
||||
if [ ! -d "$VOCAB_DIR" ]; then
|
||||
echo -e "${YELLOW}Vocabulary directory not found: $VOCAB_DIR${NC}"
|
||||
echo ""
|
||||
echo "To download OMOP vocabularies:"
|
||||
echo "1. Visit $ATHENA_URL"
|
||||
echo "2. Select the vocabularies you need"
|
||||
echo "3. Download the vocabulary bundle"
|
||||
echo "4. Extract to $VOCAB_DIR"
|
||||
echo ""
|
||||
echo "Required vocabularies for basic functionality:"
|
||||
echo " - SNOMED"
|
||||
echo " - ICD10CM"
|
||||
echo " - RxNorm"
|
||||
echo " - LOINC"
|
||||
echo " - CPT4"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for required vocabulary files
|
||||
echo -e "${YELLOW}Checking vocabulary files...${NC}"
|
||||
REQUIRED_FILES=(
|
||||
"CONCEPT.csv"
|
||||
"VOCABULARY.csv"
|
||||
"DOMAIN.csv"
|
||||
"CONCEPT_CLASS.csv"
|
||||
"CONCEPT_RELATIONSHIP.csv"
|
||||
"RELATIONSHIP.csv"
|
||||
)
|
||||
|
||||
MISSING_FILES=()
|
||||
for file in "${REQUIRED_FILES[@]}"; do
|
||||
if [ ! -f "$VOCAB_DIR/$file" ]; then
|
||||
MISSING_FILES+=("$file")
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${#MISSING_FILES[@]} -gt 0 ]; then
|
||||
echo -e "${RED}Error: Missing required vocabulary files:${NC}"
|
||||
for file in "${MISSING_FILES[@]}"; do
|
||||
echo " - $file"
|
||||
done
|
||||
echo ""
|
||||
echo "Please ensure all vocabulary files are extracted to $VOCAB_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓ All required vocabulary files found${NC}"
|
||||
echo ""
|
||||
|
||||
# Count records in vocabulary files
|
||||
echo -e "${YELLOW}Vocabulary file statistics:${NC}"
|
||||
for file in "${REQUIRED_FILES[@]}"; do
|
||||
if [ -f "$VOCAB_DIR/$file" ]; then
|
||||
count=$(wc -l < "$VOCAB_DIR/$file")
|
||||
echo " $file: $((count - 1)) records"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Load vocabularies using Python CLI
|
||||
echo -e "${YELLOW}Loading vocabularies into database...${NC}"
|
||||
echo "This may take several minutes depending on vocabulary size..."
|
||||
echo ""
|
||||
|
||||
if command -v omop-pipeline &> /dev/null; then
|
||||
omop-pipeline vocab load --path "$VOCAB_DIR"
|
||||
echo ""
|
||||
echo -e "${GREEN}✓ Vocabularies loaded successfully${NC}"
|
||||
else
|
||||
echo -e "${RED}Error: omop-pipeline command not found${NC}"
|
||||
echo "Please install the package with: pip install -e ."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo -e "${GREEN}Vocabulary loading completed!${NC}"
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo ""
|
||||
echo "You can now run the ETL pipeline:"
|
||||
echo " omop-pipeline etl run --source staging.raw_patients --target person"
|
||||
echo ""
|
||||
Reference in New Issue
Block a user