Restructured src to more logical heirachy
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
# Snowflake Configuration for NHS Patient Pathway Analysis
|
||||
#
|
||||
# This file contains connection settings for the Snowflake data warehouse.
|
||||
# IMPORTANT: This file should NOT be committed to version control if it contains
|
||||
# sensitive information. However, with externalbrowser auth, no passwords are stored.
|
||||
#
|
||||
# For NHS SSO authentication, the 'externalbrowser' authenticator opens a browser
|
||||
# window for authentication via NHS identity management.
|
||||
|
||||
[connection]
|
||||
# Snowflake account identifier (e.g., "xy12345.uk-south.azure")
|
||||
# Ask your Snowflake administrator for the correct account name
|
||||
account = "ZK91403.uk-south.azure"
|
||||
|
||||
# Default warehouse to use for queries
|
||||
# Common options: ANALYST_WH, COMPUTE_WH
|
||||
warehouse = "WH__XSMALL"
|
||||
|
||||
# Default database for queries
|
||||
# DATA_HUB is the primary analyst-curated data warehouse
|
||||
database = "DATA_HUB"
|
||||
|
||||
# Default schema (optional, can be overridden per query)
|
||||
schema = "DWH"
|
||||
|
||||
# Authentication method
|
||||
# "externalbrowser" opens browser for NHS SSO (required for NHS environments)
|
||||
# Other options: "snowflake" (username/password), "oauth", "okta"
|
||||
authenticator = "externalbrowser"
|
||||
|
||||
# User principal (email address for externalbrowser auth)
|
||||
# Leave empty to use current Windows user or prompt
|
||||
user = "ANDREW.CHARLWOOD@NHS.NET"
|
||||
|
||||
# Role to use (optional, uses default role if empty)
|
||||
role = ""
|
||||
|
||||
[timeouts]
|
||||
# Network timeout in seconds (how long client waits for Snowflake response)
|
||||
# Must be high enough for GP record lookups which can take 30-60s per batch
|
||||
connection_timeout = 600
|
||||
|
||||
# Query execution timeout in seconds (for long-running queries)
|
||||
# Set to 0 for no timeout
|
||||
query_timeout = 300
|
||||
|
||||
# Login timeout in seconds (for SSO browser auth)
|
||||
login_timeout = 120
|
||||
|
||||
[cache]
|
||||
# Enable result caching
|
||||
enabled = true
|
||||
|
||||
# Cache directory (relative to project root or absolute path)
|
||||
# Defaults to data/cache/ if not specified
|
||||
directory = "data/cache"
|
||||
|
||||
# Time-to-live for cached results in seconds
|
||||
# 24 hours for historical data (86400 seconds)
|
||||
ttl_seconds = 86400
|
||||
|
||||
# TTL for data that includes today's date (shorter)
|
||||
ttl_current_data_seconds = 3600
|
||||
|
||||
# Maximum cache size in MB (oldest entries removed when exceeded)
|
||||
max_size_mb = 500
|
||||
|
||||
[databases]
|
||||
# Quick reference for database purposes (read-only documentation)
|
||||
# DATA_HUB = "Analyst-curated data warehouse - primary source for most queries"
|
||||
# PRIMARY_CARE = "Raw extracts from EMIS and TPP clinical systems"
|
||||
# NATIONAL = "NHS England national datasets (SUS, ECDS, MHSDS, etc.)"
|
||||
# FACTS_AND_DIMENSIONS_ALL_DATA = "External reference data (BNF, SNOMED, QOF clusters)"
|
||||
# REPORTING_DATASETS_ICB = "Reporting outputs and analyst workspaces"
|
||||
|
||||
# Tables commonly used for high-cost drug analysis
|
||||
[tables.activity]
|
||||
# Main activity data source (high-cost drug interventions)
|
||||
# Acute__Conmon__PatientLevelDrugs contains patient-level high-cost drug data
|
||||
database = "DATA_HUB"
|
||||
schema = "CDM"
|
||||
table = "Acute__Conmon__PatientLevelDrugs"
|
||||
key_columns = [
|
||||
"PseudoNHSNoLinked", # Pseudonymised NHS number for patient linking
|
||||
"ProviderCode", # NHS provider code (e.g., RM1, RGP)
|
||||
"LocalPatientID", # Local patient identifier within provider
|
||||
"InterventionDate", # Date of drug intervention
|
||||
"DrugName", # Drug name (raw, needs standardization)
|
||||
"DrugSNOMEDCode", # SNOMED code for drug
|
||||
"PriceActual", # Actual cost of intervention
|
||||
"TreatmentFunctionCode", # NHS treatment function code
|
||||
"TreatmentFunctionDesc", # Treatment function description
|
||||
"AdditionalDetail1", # Additional details (used for directory identification)
|
||||
]
|
||||
|
||||
[tables.patient]
|
||||
# Patient demographics
|
||||
database = "DATA_HUB"
|
||||
schema = "DWH"
|
||||
view = "DimPerson"
|
||||
key_columns = ["PatientPseudonym", "PersonKey", "CurrentGeneralPractice"]
|
||||
|
||||
[tables.medication]
|
||||
# Medication reference data
|
||||
database = "DATA_HUB"
|
||||
schema = "DWH"
|
||||
view = "DimMedicineAndDevice"
|
||||
key_columns = ["ProductSnomedCode", "TherapeuticMoietySnomedCode", "ProductDescription"]
|
||||
|
||||
[tables.organization]
|
||||
# NHS organizations and GP practices
|
||||
database = "DATA_HUB"
|
||||
schema = "DWH"
|
||||
view = "DimOrganisationAndSite"
|
||||
key_columns = ["SiteCode", "OrganisationName"]
|
||||
|
||||
[query]
|
||||
# Default query behaviors
|
||||
# Always double-quote identifiers for case-sensitivity
|
||||
quote_identifiers = true
|
||||
|
||||
# Default row limit for test queries
|
||||
test_limit = 20
|
||||
|
||||
# Maximum rows to fetch in a single query (prevents runaway queries)
|
||||
max_rows = 100000
|
||||
|
||||
# Chunk size for large result sets
|
||||
chunk_size = 10000
|
||||
Reference in New Issue
Block a user