feat: complete Task 2.2 - test refresh pipeline with Snowflake data
Tested full refresh pipeline end-to-end with real Snowflake data: - Fixed trust filter to read Name column from defaultTrusts.csv - Fixed Decimal type handling in calculate_cost_per_patient_per_annum - Fixed array handling in convert_to_records for average_administered - Added required reference CSV files to data/ directory - Configured Snowflake connection (account, warehouse, user) Results: - Snowflake fetch: 656,695 records in ~7s - Transformations: 519,848 records after UPID/drug/directory - Pathway nodes: 293 for all_6mo (8 trusts, 14 directories) - Total processing time: ~6.2 minutes
This commit is contained in:
@@ -10,11 +10,11 @@
|
||||
[connection]
|
||||
# Snowflake account identifier (e.g., "xy12345.uk-south.azure")
|
||||
# Ask your Snowflake administrator for the correct account name
|
||||
account = ""
|
||||
account = "ZK91403.uk-south.azure"
|
||||
|
||||
# Default warehouse to use for queries
|
||||
# Common options: ANALYST_WH, COMPUTE_WH
|
||||
warehouse = "ANALYST_WH"
|
||||
warehouse = "WH__XSMALL"
|
||||
|
||||
# Default database for queries
|
||||
# DATA_HUB is the primary analyst-curated data warehouse
|
||||
@@ -30,7 +30,7 @@ authenticator = "externalbrowser"
|
||||
|
||||
# User principal (email address for externalbrowser auth)
|
||||
# Leave empty to use current Windows user or prompt
|
||||
user = ""
|
||||
user = "ANDREW.CHARLWOOD@NHS.NET"
|
||||
|
||||
# Role to use (optional, uses default role if empty)
|
||||
role = ""
|
||||
|
||||
Reference in New Issue
Block a user