commit 647d1bfa7fc134f1714920a2217301f5e636969a Author: Andrew Charlwood Date: Tue May 12 16:40:03 2026 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2916272 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +*.csv +*.xlsx +*.xls +*.parquet +*.html +*.png +*.jpg +*.jpeg +*.log +*.tmp +.tmp/ +.vscode/ +.claude/ +.openpackage/ + diff --git a/00_copied_reference/README.md b/00_copied_reference/README.md new file mode 100644 index 0000000..fdda051 --- /dev/null +++ b/00_copied_reference/README.md @@ -0,0 +1,15 @@ +# Copied Reference Queries + +These files were copied from the working `Snowflake-Queries` repo because they were already explicitly template-like. + +Use these as reference examples, not as the first place to start a new query. Several contain historic assumptions such as Norfolk and Waveney geography, hardcoded SNOMED codes, or notes that were current when the original work was done. + +For new work, start from the cleaned templates in the numbered folders at the repo root. + +## Contents + +- `medicine_lookup_checks/`: original short checks for prescribing or dispensing by VTM or VMP. +- `pqs_long_format/`: original PQS long-format and rolling-period templates. +- `useful_short_queries/`: original compact lookup, freshness, price/unit, and practice-list queries. + +The original `CheckPrescribingByPseudo.sql` file was deliberately not copied because it contains a hardcoded patient pseudonym. diff --git a/00_copied_reference/medicine_lookup_checks/CheckDispeningByVMP.sql b/00_copied_reference/medicine_lookup_checks/CheckDispeningByVMP.sql new file mode 100644 index 0000000..58ecf22 --- /dev/null +++ b/00_copied_reference/medicine_lookup_checks/CheckDispeningByVMP.sql @@ -0,0 +1,27 @@ +-- Snowflake version: Check dispensing by VMP (Virtual Medicinal Product) +-- Uses GPMeds dispensing data from NATIONAL.GPMED + +WITH SnomedCodes AS ( + SELECT "ProductSnomedCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "MedicinalLatestSnomedCode" = '40326811000001109' -- Specific VMP +), +LatestPeriod AS ( + SELECT MAX("ProcessingPeriodDate") AS MaxPeriodDate + FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" +) +SELECT + gpm."ProcessingPeriodDate", + dos."OrganisationName", + COUNT(DISTINCT gpm."PatientPseudonym") AS UniquePatientCount, + SUM(gpm."ItemCount") AS TotalItemsDispensed, + SUM(gpm."PaidQuantity") AS TotalQuantityDispensed +FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" gpm +INNER JOIN SnomedCodes sc ON gpm."PaiddmdCode" = sc."ProductSnomedCode" +LEFT JOIN DATA_HUB.DWH."DimOrganisationAndSite" dos + ON gpm."CostCentreODSCode" = dos."SiteCode" +CROSS JOIN LatestPeriod lp +WHERE gpm."ProcessingPeriodDate" > DATEADD(MONTH, -12, lp.MaxPeriodDate) + AND gpm."ProcessingPeriodDate" <= lp.MaxPeriodDate +GROUP BY gpm."ProcessingPeriodDate", dos."OrganisationName", gpm."CostCentreODSCode" +ORDER BY gpm."ProcessingPeriodDate", dos."OrganisationName"; diff --git a/00_copied_reference/medicine_lookup_checks/CheckDispeningByVTM.sql b/00_copied_reference/medicine_lookup_checks/CheckDispeningByVTM.sql new file mode 100644 index 0000000..cc73bfe --- /dev/null +++ b/00_copied_reference/medicine_lookup_checks/CheckDispeningByVTM.sql @@ -0,0 +1,29 @@ +-- Snowflake version: Check dispensing by VTM (Virtual Therapeutic Moiety) +-- Uses GPMeds dispensing data from NATIONAL.GPMED + +WITH SnomedCodes AS ( + SELECT "ProductSnomedCode", "ProductDescription" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "TherapeuticMoietySnomedCode" = '41145911000001106' -- Tirzepatide VTM +), +LatestPeriod AS ( + SELECT MAX("ProcessingPeriodDate") AS MaxPeriodDate + FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" +) +SELECT * +FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" gpm +INNER JOIN SnomedCodes sc ON gpm."PaiddmdCode" = sc."ProductSnomedCode" +CROSS JOIN LatestPeriod lp +WHERE gpm."ProcessingPeriodDate" > DATEADD(MONTH, -1, lp.MaxPeriodDate) + AND gpm."ProcessingPeriodDate" <= lp.MaxPeriodDate +limit 100; + + +SELECT + COUNT(DISTINCT gpm."PatientPseudonym") AS UniquePatientCount, + MAX(gpm."ProcessingPeriodDate") AS LatestPeriod +FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" gpm +INNER JOIN SnomedCodes sc ON gpm."PaiddmdCode" = sc."ProductSnomedCode" +CROSS JOIN LatestPeriod lp +WHERE gpm."ProcessingPeriodDate" > DATEADD(MONTH, -1, lp.MaxPeriodDate) + AND gpm."ProcessingPeriodDate" <= lp.MaxPeriodDate; diff --git a/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVMP.sql b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVMP.sql new file mode 100644 index 0000000..c1f1ba8 --- /dev/null +++ b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVMP.sql @@ -0,0 +1,19 @@ +-- Snowflake version: Check prescribing by VMP (Virtual Medicinal Product) +-- Uses unified PrimaryCareMedication table (combines EMIS + TPP) + +SET StartDate = '2025-04-01'; +SET EndDate = '2025-07-31'; + +WITH SnomedCodes AS ( + SELECT "ProductSnomedCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "MedicinalLatestSnomedCode" = '40326811000001109' -- Specific VMP +) +SELECT DISTINCT + pcm."PatientPseudonym", + pcm."DateMedicationStart" AS EffectiveDate, + CAST(pcm."Quantity" AS VARCHAR(20)) AS Quantity +FROM DATA_HUB.PHM."PrimaryCareMedication" pcm +INNER JOIN SnomedCodes sc ON pcm."SNOMEDCode" = sc."ProductSnomedCode" +WHERE pcm."DateMedicationStart" BETWEEN $StartDate AND $EndDate + AND pcm."PatientPseudonym" IS NOT NULL; diff --git a/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTM.sql b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTM.sql new file mode 100644 index 0000000..c3697cc --- /dev/null +++ b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTM.sql @@ -0,0 +1,30 @@ +-- Snowflake version: Check prescribing by VTM (Virtual Therapeutic Moiety) +-- Uses unified PrimaryCareMedication table (combines EMIS + TPP) + +SET StartDate = '2025-11-01'; +SET EndDate = '2025-12-31'; + +WITH SnomedCodes AS ( + SELECT "ProductSnomedCode", "ProductDescription" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "TherapeuticMoietySnomedCode" = '41145911000001106' -- Tirzepatide VTM +) +SELECT DISTINCT + dos."OrganisationName", + dp."PersonKey", + pcm."DateMedicationStart" AS EffectiveDate, + sc."ProductDescription", + CAST(pcm."Quantity" AS VARCHAR(20)) AS Quantity, + pcm."QuantityText" AS Dosage +FROM DATA_HUB.PHM."PrimaryCareMedication" pcm +INNER JOIN DATA_HUB.DWH."DimPerson" dp + ON pcm."PatientPseudonym" = dp."PatientPseudonym" +INNER JOIN SnomedCodes sc + ON pcm."SNOMEDCode" = sc."ProductSnomedCode" +INNER JOIN DATA_HUB.DWH."DimOrganisationAndSite" dos + ON dp."CurrentGeneralPractice" = dos."SiteCode" +WHERE pcm."DateMedicationStart" BETWEEN $StartDate AND $EndDate + AND pcm."PatientPseudonym" IS NOT NULL + AND dos."OrganisationSubType" = 'GP Practice' + AND dos."IsSiteNorfolkAndWaveney" = 'Yes' + AND dos."IsSiteActive" = 'Yes'; diff --git a/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTMMonthlyCount.sql b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTMMonthlyCount.sql new file mode 100644 index 0000000..43d366f --- /dev/null +++ b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTMMonthlyCount.sql @@ -0,0 +1,28 @@ +-- Snowflake version: Check prescribing by VTM (Virtual Therapeutic Moiety) +-- Uses unified PrimaryCareMedication table (combines EMIS + TPP) + +SET StartDate = '2025-05-01'; +SET EndDate = '2025-11-30'; + +WITH SnomedCodes AS ( + SELECT "ProductSnomedCode", "ProductDescription" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "TherapeuticMoietySnomedCode" = '41145911000001106' -- Tirzepatide VTM +) +SELECT + DATE_TRUNC('MONTH', pcm."DateMedicationStart") AS PrescribingMonth, + COUNT(DISTINCT dp."PersonKey") AS UniquePatientCount +FROM DATA_HUB.PHM."PrimaryCareMedication" pcm +INNER JOIN DATA_HUB.DWH."DimPerson" dp + ON pcm."PatientPseudonym" = dp."PatientPseudonym" +INNER JOIN SnomedCodes sc + ON pcm."SNOMEDCode" = sc."ProductSnomedCode" +INNER JOIN DATA_HUB.DWH."DimOrganisationAndSite" dos + ON dp."CurrentGeneralPractice" = dos."SiteCode" +WHERE pcm."DateMedicationStart" BETWEEN $StartDate AND $EndDate + AND pcm."PatientPseudonym" IS NOT NULL + AND dos."OrganisationSubType" = 'GP Practice' + AND dos."IsSiteNorfolkAndWaveney" = 'Yes' + AND dos."IsSiteActive" = 'Yes' +GROUP BY DATE_TRUNC('MONTH', pcm."DateMedicationStart") +ORDER BY PrescribingMonth diff --git a/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTM_3Months.sql b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTM_3Months.sql new file mode 100644 index 0000000..56e0784 --- /dev/null +++ b/00_copied_reference/medicine_lookup_checks/CheckPrescribingByVTM_3Months.sql @@ -0,0 +1,34 @@ +-- Snowflake version: Check prescribing by VTM for last 3 months +-- Uses unified PrimaryCareMedication table (combines EMIS + TPP) +-- Dynamically calculates date range from latest data + +WITH LatestDate AS ( + SELECT DATEADD(DAY, 1, MAX("DateMedicationStart")::DATE) AS EndDate + FROM DATA_HUB.PHM."PrimaryCareMedication" + WHERE "DateMedicationStart" >= DATEADD(MONTH, -6, CURRENT_DATE()) +), +DateRange AS ( + SELECT + EndDate, + DATEADD(MONTH, -3, EndDate) AS StartDate + FROM LatestDate +), +SnomedCodes AS ( + SELECT "ProductSnomedCode", "ProductDescription" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "TherapeuticMoietySnomedCode" = '775477008' -- Tirzepatide VTM +), +AllPatients AS ( + SELECT DISTINCT pcm."PatientPseudonym" + FROM DATA_HUB.PHM."PrimaryCareMedication" pcm + INNER JOIN SnomedCodes sc ON pcm."SNOMEDCode" = sc."ProductSnomedCode" + CROSS JOIN DateRange dr + WHERE pcm."DateMedicationStart" > dr.StartDate + AND pcm."DateMedicationStart" <= dr.EndDate + AND pcm."PatientPseudonym" IS NOT NULL +) +SELECT + COUNT(DISTINCT "PatientPseudonym") AS UniquePatientCount, + (SELECT StartDate FROM DateRange) AS StartDate, + (SELECT EndDate FROM DateRange) AS EndDate +FROM AllPatients; diff --git a/00_copied_reference/pqs_long_format/_period_generator.sql b/00_copied_reference/pqs_long_format/_period_generator.sql new file mode 100644 index 0000000..c795ee3 --- /dev/null +++ b/00_copied_reference/pqs_long_format/_period_generator.sql @@ -0,0 +1,90 @@ +-- ============================================================================== +-- PERIOD GENERATOR - Phase 7 Long Format Output +-- ============================================================================== +-- This file contains the reusable recursive CTE pattern for generating +-- dynamic rolling periods from June 2025 to the current month. +-- +-- Requirements: +-- - Earliest period: ends 2025-06-30 (hardcoded) +-- - Latest period: ends at last day of current month (dynamic) +-- - One period per month from June 2025 to current month +-- - 12-month lookback for M1, M2, M3, M5, M6, M7, M8 +-- - 6-month lookback for M4 (Female UTI) +-- +-- Example output (run 2026-01-29): +-- 8 periods: Jun 25, Jul 25, Aug 25, Sep 25, Oct 25, Nov 25, Dec 25, Jan 26 +-- ============================================================================== + +-- ============================================================================== +-- RECURSIVE CTE PATTERN (Copy this into each measure query) +-- ============================================================================== + +WITH RECURSIVE date_periods AS ( + -- Anchor: first period ends June 2025 + SELECT DATE '2025-06-30' AS period_end_date + UNION ALL + -- Recurse: add one month until we reach current month + SELECT LAST_DAY(DATEADD(MONTH, 1, period_end_date)) + FROM date_periods + WHERE period_end_date < LAST_DAY(CURRENT_DATE()) +), + +-- Calculate start dates for both 12-month and 6-month lookbacks +rolling_periods AS ( + SELECT + CAST(DATEADD(MONTH, -11, DATE_TRUNC('MONTH', period_end_date)) AS DATE) AS "PeriodStartDate_12m", + CAST(DATEADD(MONTH, -5, DATE_TRUNC('MONTH', period_end_date)) AS DATE) AS "PeriodStartDate_6m", + CAST(period_end_date AS DATE) AS "PeriodEndDate" + FROM date_periods +) + +SELECT * FROM rolling_periods ORDER BY "PeriodEndDate"; + +-- ============================================================================== +-- EXPECTED OUTPUT (as of January 2026) +-- ============================================================================== +-- PeriodEndDate PeriodStartDate_12m PeriodStartDate_6m +-- 2025-06-30 2024-07-01 2025-01-01 +-- 2025-07-31 2024-08-01 2025-02-01 +-- 2025-08-31 2024-09-01 2025-03-01 +-- 2025-09-30 2024-10-01 2025-04-01 +-- 2025-10-31 2024-11-01 2025-05-01 +-- 2025-11-30 2024-12-01 2025-06-01 +-- 2025-12-31 2025-01-01 2025-07-01 +-- 2026-01-31 2025-02-01 2025-08-01 +-- +-- 8 periods total (June 2025 through January 2026) +-- ============================================================================== + +-- ============================================================================== +-- USAGE NOTES +-- ============================================================================== +-- 1. For 12-month measures (M1, M2, M3, M5, M6, M7, M8): +-- Filter data WHERE date_column BETWEEN "PeriodStartDate_12m" AND "PeriodEndDate" +-- +-- 2. For 6-month measure (M4): +-- Filter data WHERE date_column BETWEEN "PeriodStartDate_6m" AND "PeriodEndDate" +-- +-- 3. The recursive CTE is dynamic: +-- - Running in January 2026 → 8 periods +-- - Running in February 2026 → 9 periods +-- - Running in June 2026 → 13 periods +-- +-- 4. This pattern should be included at the START of each measure query +-- before the main data CTEs. +-- ============================================================================== + +-- ============================================================================== +-- DATA AVAILABILITY NOTES +-- ============================================================================== +-- DISPENSING (NATIONAL.GPMED.MedicinesDispensedInPrimarycare): +-- Max date: ~2025-07 (updates monthly) +-- Min date: 2018-04 +-- Coverage: 7+ years — all periods will have full data +-- +-- PRESCRIBING (REPORTING_DATASETS_ICB.SCRATCHPAD.MEDS__UnifiedPrescribingTable): +-- Max date: ~2026-01 (updates frequently) +-- Min date: 2024-01 +-- Coverage: ~2 years — earlier periods may have partial/no data +-- Note: Periods with PeriodStartDate before 2024-01 will show no prescribing data +-- ============================================================================== diff --git a/00_copied_reference/pqs_long_format/_rolling_periods_template.sql b/00_copied_reference/pqs_long_format/_rolling_periods_template.sql new file mode 100644 index 0000000..9ffdc29 --- /dev/null +++ b/00_copied_reference/pqs_long_format/_rolling_periods_template.sql @@ -0,0 +1,203 @@ +-- ============================================================================== +-- ROLLING PERIODS TEMPLATE - Phase 7 Long Format Output +-- ============================================================================== +-- This file documents the reusable CTE patterns for generating rolling time +-- periods for both dispensing and prescribing data sources. +-- +-- Key considerations: +-- - Dispensing data: 2018-04 to 2025-07 (88 months) - full historical coverage +-- - Prescribing data: 2024-01 to 2026-01 (25 months) - LIMITED HISTORY +-- (prescribing period 2 will have partial/no data) +-- +-- IMPORTANT: Each data source has its own MAX date, so periods must be +-- calculated separately within each UNION ALL branch. +-- ============================================================================== + +-- ============================================================================== +-- PATTERN 1: 12-MONTH ROLLING PERIODS (for M1, M2, M3, M5, M6, M7, M8) +-- ============================================================================== +-- Generates 3 rolling periods covering approximately 3 years: +-- Period 0: Most recent 12 months +-- Period 1: 12-24 months ago +-- Period 2: 24-36 months ago + +/* +-- DISPENSING DATA version: +WITH max_dates_disp AS ( + SELECT MAX("ProcessingPeriodDate") AS max_date + FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" +), +period_offsets AS ( + SELECT 0 AS period_offset UNION ALL + SELECT 1 UNION ALL + SELECT 2 +), +rolling_periods_12m AS ( + SELECT + period_offset, + CAST(DATEADD(MONTH, -11, DATE_TRUNC('MONTH', DATEADD(MONTH, -12 * period_offset, max_date))) AS DATE) AS period_start_date, + CAST(LAST_DAY(DATEADD(MONTH, -12 * period_offset, max_date)) AS DATE) AS period_end_date + FROM period_offsets, max_dates_disp +) +-- Then filter data WHERE "ProcessingPeriodDate" BETWEEN period_start_date AND period_end_date + +-- Example output (as of July 2025): +-- Period 0: 2024-08-01 to 2025-07-31 +-- Period 1: 2023-08-01 to 2024-07-31 +-- Period 2: 2022-08-01 to 2023-07-31 +*/ + +-- ============================================================================== +-- PATTERN 2: 6-MONTH ROLLING PERIODS (for M4 - Female UTI) +-- ============================================================================== +-- Generates 6 rolling periods covering approximately 3 years: +-- Period 0: Most recent 6 months +-- Period 1: 6-12 months ago +-- Period 2: 12-18 months ago +-- ...etc + +/* +WITH max_dates_disp AS ( + SELECT MAX("ProcessingPeriodDate") AS max_date + FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" +), +period_offsets AS ( + SELECT 0 AS period_offset UNION ALL + SELECT 1 UNION ALL + SELECT 2 UNION ALL + SELECT 3 UNION ALL + SELECT 4 UNION ALL + SELECT 5 +), +rolling_periods_6m AS ( + SELECT + period_offset, + CAST(DATEADD(MONTH, -5, DATE_TRUNC('MONTH', DATEADD(MONTH, -6 * period_offset, max_date))) AS DATE) AS period_start_date, + CAST(LAST_DAY(DATEADD(MONTH, -6 * period_offset, max_date)) AS DATE) AS period_end_date + FROM period_offsets, max_dates_disp +) +-- Then filter data WHERE "ProcessingPeriodDate" BETWEEN period_start_date AND period_end_date + +-- Example output (as of July 2025): +-- Period 0: 2025-02-01 to 2025-07-31 +-- Period 1: 2024-08-01 to 2025-01-31 +-- Period 2: 2024-02-01 to 2024-07-31 +-- Period 3: 2023-08-01 to 2024-01-31 +-- Period 4: 2023-02-01 to 2023-07-31 +-- Period 5: 2022-08-01 to 2023-01-31 +*/ + +-- ============================================================================== +-- COMBINED DISPENSING + PRESCRIBING TEMPLATE +-- ============================================================================== +-- Each data source calculates periods from its own MAX date, then combines +-- with UNION ALL. The DataSource column distinguishes them. + +/* +WITH +-- ========================= +-- DISPENSING PERIODS +-- ========================= +max_dates_disp AS ( + SELECT MAX("ProcessingPeriodDate") AS max_date + FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" +), +period_offsets AS ( + SELECT 0 AS period_offset UNION ALL SELECT 1 UNION ALL SELECT 2 +), +disp_periods AS ( + SELECT + period_offset, + CAST(DATEADD(MONTH, -11, DATE_TRUNC('MONTH', DATEADD(MONTH, -12 * period_offset, max_date))) AS DATE) AS period_start_date, + CAST(LAST_DAY(DATEADD(MONTH, -12 * period_offset, max_date)) AS DATE) AS period_end_date + FROM period_offsets, max_dates_disp +), + +-- ========================= +-- PRESCRIBING PERIODS +-- ========================= +max_dates_pres AS ( + SELECT MAX("DateMedicationStart") AS max_date + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" + WHERE "DateMedicationStart" IS NOT NULL +), +pres_periods AS ( + SELECT + period_offset, + CAST(DATEADD(MONTH, -11, DATE_TRUNC('MONTH', DATEADD(MONTH, -12 * period_offset, max_date))) AS DATE) AS period_start_date, + CAST(LAST_DAY(DATEADD(MONTH, -12 * period_offset, max_date)) AS DATE) AS period_end_date + FROM period_offsets, max_dates_pres +), + +-- ========================= +-- DATA QUERIES (example structure) +-- ========================= +disp_data AS ( + SELECT + org."OrganisationName", + dp.period_start_date AS "PeriodStartDate", + dp.period_end_date AS "PeriodEndDate", + 'Dispensing' AS "DataSource", + -- ... aggregations + FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" meds + CROSS JOIN disp_periods dp + JOIN DATA_HUB.DWH."DimOrganisationAndSite" org ON ... + WHERE meds."ProcessingPeriodDate" BETWEEN dp.period_start_date AND dp.period_end_date + GROUP BY org."OrganisationName", dp.period_start_date, dp.period_end_date +), + +pres_data AS ( + SELECT + org."OrganisationName", + pp.period_start_date AS "PeriodStartDate", + pp.period_end_date AS "PeriodEndDate", + 'Prescribing' AS "DataSource", + -- ... aggregations + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + CROSS JOIN pres_periods pp + JOIN DATA_HUB.DWH."DimOrganisationAndSite" org ON ... + WHERE rx."DateMedicationStart" BETWEEN pp.period_start_date AND pp.period_end_date + GROUP BY org."OrganisationName", pp.period_start_date, pp.period_end_date +) + +-- ========================= +-- UNPIVOT TO LONG FORMAT +-- ========================= +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", "DataSource", + 'M1' AS "Measure", 'IndicatorA' AS "Indicator", CAST("IndicatorA" AS FLOAT) AS "Value" +FROM disp_data +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", "DataSource", + 'M1' AS "Measure", 'IndicatorB' AS "Indicator", CAST("IndicatorB" AS FLOAT) AS "Value" +FROM disp_data +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", "DataSource", + 'M1' AS "Measure", 'IndicatorA' AS "Indicator", CAST("IndicatorA" AS FLOAT) AS "Value" +FROM pres_data +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", "DataSource", + 'M1' AS "Measure", 'IndicatorB' AS "Indicator", CAST("IndicatorB" AS FLOAT) AS "Value" +FROM pres_data +*/ + +-- ============================================================================== +-- DATE RANGE NOTES +-- ============================================================================== +-- As of January 2026: +-- +-- DISPENSING (NATIONAL.GPMED.MedicinesDispensedInPrimarycare): +-- Max date: 2025-07-01 +-- Min date: 2018-04-01 +-- Coverage: 88 months (7+ years) +-- All 3 rolling periods have full data +-- +-- PRESCRIBING (REPORTING_DATASETS_ICB.SCRATCHPAD.MEDS__UnifiedPrescribingTable): +-- Max date: 2026-01-06 +-- Min date: 2024-01-06 +-- Coverage: 25 months (~2 years) +-- Period 0: Full data +-- Period 1: Full data +-- Period 2: PARTIAL OR NO DATA (starts 2023-02, but data begins 2024-01) +-- +-- This asymmetry is expected - prescribing data is newer than dispensing. +-- ============================================================================== diff --git a/00_copied_reference/pqs_long_format/_unified_template.sql b/00_copied_reference/pqs_long_format/_unified_template.sql new file mode 100644 index 0000000..e536c20 --- /dev/null +++ b/00_copied_reference/pqs_long_format/_unified_template.sql @@ -0,0 +1,255 @@ +-- ============================================================================== +-- UNIFIED SQL TEMPLATE - Phase 7 Long Format Output +-- ============================================================================== +-- This template shows the complete pattern for converting measures to long format +-- with both dispensing and prescribing data sources and rolling time periods. +-- +-- Output columns (7 standard columns): +-- OrganisationName - GP Practice name +-- PeriodStartDate - Start of the time window (DATE type) +-- PeriodEndDate - End of the time window (DATE type) +-- DataSource - 'Dispensing' or 'Prescribing' +-- Measure - Measure identifier (e.g., 'M2') +-- Indicator - Metric name (e.g., 'TotalPrescriptions') +-- Value - The metric value (FLOAT type) +-- +-- Key patterns: +-- 1. Recursive CTE generates all periods from June 2025 to current month +-- 2. Cross join practices with periods for all combinations +-- 3. Left join dispensing data filtered by date range per period +-- 4. Inner join prescribing data filtered by date range per period +-- 5. UNION ALL to unpivot wide indicators into long format +-- 6. UNION ALL to combine dispensing and prescribing data sources +-- ============================================================================== + +-- ============================================================================== +-- SECTION 1: RECURSIVE PERIOD GENERATOR +-- Copy this section to all measure queries unchanged +-- ============================================================================== + +WITH RECURSIVE date_periods AS ( + -- Anchor: first period ends June 2025 + SELECT DATE '2025-06-30' AS period_end_date + UNION ALL + -- Recurse: add one month until we reach current month + SELECT LAST_DAY(DATEADD(MONTH, 1, period_end_date)) + FROM date_periods + WHERE period_end_date < LAST_DAY(CURRENT_DATE()) +), +rolling_periods AS ( + SELECT + -- For 12-month measures (M1, M2, M3, M5, M6, M7, M8): + CAST(DATEADD(MONTH, -11, DATE_TRUNC('MONTH', period_end_date)) AS DATE) AS "PeriodStartDate", + -- For 6-month measures (M4), use this instead: + -- CAST(DATEADD(MONTH, -5, DATE_TRUNC('MONTH', period_end_date)) AS DATE) AS "PeriodStartDate", + CAST(period_end_date AS DATE) AS "PeriodEndDate" + FROM date_periods +), + +-- ============================================================================== +-- SECTION 2: COMMON REFERENCE TABLES +-- Adapt these CTEs based on measure requirements +-- ============================================================================== + +practices AS ( + -- Norfolk & Waveney GP practices + SELECT DISTINCT "OrganisationCode", "OrganisationName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "IsSiteNorfolkAndWaveney" = 'Yes' + AND "IsSiteActive" = 'Yes' + AND "OrganisationSubType" = 'GP Practice' +), +practice_periods AS ( + -- Cross join: all practices × all periods + SELECT + p."OrganisationCode", + p."OrganisationName", + rp."PeriodStartDate", + rp."PeriodEndDate" + FROM practices p + CROSS JOIN rolling_periods rp +), +ooh_providers AS ( + -- Out of Hours provider organisations (for prescribing data) + SELECT DISTINCT "OrganisationCode" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "IsSiteActive" = 'Yes' + AND ("OrganisationName" LIKE '%INTEGRATED CARE%' + OR "OrganisationName" LIKE '%IC24%' + OR "OrganisationCode" = 'Y02751') +), +patients AS ( + -- All patients with a registered GP (adapt age/gender filters per measure) + SELECT p."PersonKey", p."PatientPseudonym", p."PHMGeneralPractice" AS "GP" + FROM DATA_HUB.DWH."DimPerson" p + WHERE p."PHMGeneralPractice" IS NOT NULL + AND p."PHMGeneralPractice" <> '*' +), + +-- ============================================================================== +-- SECTION 3: DISPENSING DATA AGGREGATION +-- Adapt the SELECT, JOIN, WHERE, and GROUP BY for each measure +-- ============================================================================== + +dispensing_agg AS ( + SELECT + pp."OrganisationName", + pp."PeriodStartDate", + pp."PeriodEndDate", + -- Measure-specific aggregations (example: M2 duration) + COUNT(DISTINCT meds."PatientPseudonym") AS "UniquePatients", + COUNT(*) AS "TotalPrescriptions", + SUM( + CASE + WHEN (LEFT(meds."PaidBNFCode", 9) IN ('0501030I0', '0501030Z0') AND meds."PaidQuantity" > 6) THEN 1 + WHEN (LEFT(meds."PaidBNFCode", 9) = '0501013B0' AND meds."PaidQuantity" > 15) THEN 1 + ELSE 0 + END + ) AS "PrescriptionsMoreThan5Days" + FROM practice_periods pp + LEFT JOIN NATIONAL.GPMED."MedicinesDispensedInPrimarycare" meds + ON pp."OrganisationCode" = meds."CostCentreODSCode" + AND meds."ProcessingPeriodDate" BETWEEN pp."PeriodStartDate" AND pp."PeriodEndDate" + -- Measure-specific BNF code filter + AND LEFT(meds."PaidBNFCode", 9) IN ('0501030I0', '0501030Z0', '0501013B0') + LEFT JOIN DATA_HUB.DWH."DimPerson" dp + ON dp."PatientPseudonym" = meds."PatientPseudonym" + -- Y02751 exclusion: exclude OOH provider's own registered patients + WHERE (pp."OrganisationCode" <> 'Y02751' OR dp."PHMGeneralPractice" = 'Y02751') + GROUP BY pp."OrganisationName", pp."PeriodStartDate", pp."PeriodEndDate" + -- Only include practices with data + HAVING COUNT(*) > 0 +), +dispensing_with_pct AS ( + -- Calculate derived metrics (percentages, ratios) + SELECT + "OrganisationName", + "PeriodStartDate", + "PeriodEndDate", + "UniquePatients", + "TotalPrescriptions", + "PrescriptionsMoreThan5Days", + ROUND(100.0 * "PrescriptionsMoreThan5Days" / NULLIF("TotalPrescriptions", 0), 2) AS "PercentageMoreThan5Days" + FROM dispensing_agg +), + +-- ============================================================================== +-- SECTION 4: PRESCRIBING DATA AGGREGATION +-- Similar structure to dispensing, but with SNOMED→BNF mapping +-- ============================================================================== + +prescribing_agg AS ( + SELECT + pp."OrganisationName", + pp."PeriodStartDate", + pp."PeriodEndDate", + -- Measure-specific aggregations + COUNT(DISTINCT patients."PatientPseudonym") AS "UniquePatients", + COUNT(*) AS "TotalPrescriptions", + SUM( + CASE + WHEN (LEFT(med."BNFCode", 9) IN ('0501030I0', '0501030Z0') AND rx."Quantity" > 6) THEN 1 + WHEN (LEFT(med."BNFCode", 9) = '0501013B0' AND rx."Quantity" > 15) THEN 1 + ELSE 0 + END + ) AS "PrescriptionsMoreThan5Days" + FROM practice_periods pp + INNER JOIN patients ON patients."GP" = pp."OrganisationCode" + INNER JOIN REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + ON rx."PersonKey" = patients."PersonKey" + AND rx."DateMedicationStart" BETWEEN pp."PeriodStartDate" AND pp."PeriodEndDate" + -- SNOMED→BNF mapping via DimMedicineAndDevice + INNER JOIN DATA_HUB.DWH."DimMedicineAndDevice" med + ON rx."SNOMEDCode" = med."ProductSnomedCode" + AND med."BNFCode" IS NOT NULL + -- Measure-specific BNF code filter + AND LEFT(med."BNFCode", 9) IN ('0501030I0', '0501030Z0', '0501013B0') + -- OOH provider filter + INNER JOIN ooh_providers ooh ON ooh."OrganisationCode" = rx."OrgCode" + -- Exclude OOH provider's own registered patients + WHERE NOT (rx."OrgCode" = 'Y02751' AND rx."CurrentGeneralPractice" = 'Y02751') + GROUP BY pp."OrganisationName", pp."PeriodStartDate", pp."PeriodEndDate" + HAVING COUNT(*) > 0 +), +prescribing_with_pct AS ( + SELECT + "OrganisationName", + "PeriodStartDate", + "PeriodEndDate", + "UniquePatients", + "TotalPrescriptions", + "PrescriptionsMoreThan5Days", + ROUND(100.0 * "PrescriptionsMoreThan5Days" / NULLIF("TotalPrescriptions", 0), 2) AS "PercentageMoreThan5Days" + FROM prescribing_agg +) + +-- ============================================================================== +-- SECTION 5: LONG FORMAT OUTPUT +-- Use UNION ALL to: +-- 1. Unpivot each indicator into separate rows +-- 2. Combine dispensing and prescribing data sources +-- ============================================================================== + +-- DISPENSING indicators +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Dispensing' AS "DataSource", 'M2' AS "Measure", + 'UniquePatients' AS "Indicator", + CAST("UniquePatients" AS FLOAT) AS "Value" +FROM dispensing_with_pct +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Dispensing' AS "DataSource", 'M2' AS "Measure", + 'TotalPrescriptions' AS "Indicator", + CAST("TotalPrescriptions" AS FLOAT) AS "Value" +FROM dispensing_with_pct +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Dispensing' AS "DataSource", 'M2' AS "Measure", + 'PrescriptionsMoreThan5Days' AS "Indicator", + CAST("PrescriptionsMoreThan5Days" AS FLOAT) AS "Value" +FROM dispensing_with_pct +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Dispensing' AS "DataSource", 'M2' AS "Measure", + 'PercentageMoreThan5Days' AS "Indicator", + CAST("PercentageMoreThan5Days" AS FLOAT) AS "Value" +FROM dispensing_with_pct +UNION ALL +-- PRESCRIBING indicators +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Prescribing' AS "DataSource", 'M2' AS "Measure", + 'UniquePatients' AS "Indicator", + CAST("UniquePatients" AS FLOAT) AS "Value" +FROM prescribing_with_pct +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Prescribing' AS "DataSource", 'M2' AS "Measure", + 'TotalPrescriptions' AS "Indicator", + CAST("TotalPrescriptions" AS FLOAT) AS "Value" +FROM prescribing_with_pct +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Prescribing' AS "DataSource", 'M2' AS "Measure", + 'PrescriptionsMoreThan5Days' AS "Indicator", + CAST("PrescriptionsMoreThan5Days" AS FLOAT) AS "Value" +FROM prescribing_with_pct +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", + 'Prescribing' AS "DataSource", 'M2' AS "Measure", + 'PercentageMoreThan5Days' AS "Indicator", + CAST("PercentageMoreThan5Days" AS FLOAT) AS "Value" +FROM prescribing_with_pct + +ORDER BY "PeriodEndDate", "DataSource", "OrganisationName", "Indicator"; + +-- ============================================================================== +-- EXPECTED OUTPUT STRUCTURE +-- ============================================================================== +-- For M2 with 4 indicators, 2 data sources, 8 periods, ~100 practices: +-- Rows ≈ 4 indicators × 2 sources × 8 periods × 100 practices = 6,400 rows +-- +-- Sample row: +-- | OrganisationName | PeriodStartDate | PeriodEndDate | DataSource | Measure | Indicator | Value | +-- |------------------------|-----------------|---------------|-------------|---------|------------------------|--------| +-- | Acle Medical Partnership | 2024-07-01 | 2025-06-30 | Dispensing | M2 | PercentageMoreThan5Days| 57.04 | +-- ============================================================================== diff --git a/00_copied_reference/useful_short_queries/MEDS__ProductPriceAndUnitLookup.sql b/00_copied_reference/useful_short_queries/MEDS__ProductPriceAndUnitLookup.sql new file mode 100644 index 0000000..8d93c61 --- /dev/null +++ b/00_copied_reference/useful_short_queries/MEDS__ProductPriceAndUnitLookup.sql @@ -0,0 +1,90 @@ +-- ============================================================================ +-- MEDS__ProductPriceAndUnitLookup +-- Medicine Reference Data: Price Per Unit and Pack Unit Descriptions +-- ============================================================================ +-- Derives PricePerUnit and PackUnitDescription for all dm+d product levels +-- For VMPP/AMPP: uses direct values from DimMedicineAndDevice +-- For VMP/AMP: derives from child VMPP/AMPP products +-- - Price: AVG of child prices +-- - Units: MODE (most common) of child units +-- Price priority: DrugTariffPricePerUnit > IndicativePricePerUnit > AnnualCost/AnnualQuantity +-- ============================================================================ + +WITH PackLevelData AS ( + -- Get prices and units for pack-level products (VMPP/AMPP) + SELECT + "ProductSnomedCode", + "ParentPresentationSnomedCode", + "ProductLevel", + "PackUnitDescription", + COALESCE( + "DrugTariffPricePerUnit", + "IndicativePricePerUnit", + CASE WHEN "AnnualQuantity" > 0 THEN "AnnualCost" / "AnnualQuantity" END + ) AS PricePerUnit + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "ProductLevel" IN ('VMPP', 'AMPP') +), + +ProductPrices AS ( + -- Direct prices for VMPP/AMPP + SELECT + "ProductSnomedCode", + PricePerUnit + FROM PackLevelData + WHERE PricePerUnit IS NOT NULL + + UNION ALL + + -- Aggregated prices for VMP/AMP from their children + SELECT + parent."ProductSnomedCode", + AVG(pld.PricePerUnit) AS PricePerUnit + FROM DATA_HUB.DWH."DimMedicineAndDevice" parent + JOIN PackLevelData pld ON pld."ParentPresentationSnomedCode" = parent."ProductSnomedCode" + WHERE parent."ProductLevel" IN ('VMP', 'AMP') + AND pld.PricePerUnit IS NOT NULL + GROUP BY parent."ProductSnomedCode" +), + +PackUnits AS ( + -- Direct units for VMPP/AMPP + SELECT + "ProductSnomedCode", + "PackUnitDescription" + FROM PackLevelData + WHERE "PackUnitDescription" IS NOT NULL + + UNION ALL + + -- Units for VMP/AMP: use most common unit from child VMPP/AMPP products + SELECT + parent."ProductSnomedCode", + MODE(pld."PackUnitDescription") AS "PackUnitDescription" + FROM DATA_HUB.DWH."DimMedicineAndDevice" parent + JOIN PackLevelData pld ON pld."ParentPresentationSnomedCode" = parent."ProductSnomedCode" + WHERE parent."ProductLevel" IN ('VMP', 'AMP') + AND pld."PackUnitDescription" IS NOT NULL + GROUP BY parent."ProductSnomedCode" +) + +-- ============================================================================ +-- Usage: Join these CTEs to prescribing data on ProductSnomedCode +-- ============================================================================ +-- Example: +-- SELECT +-- prescribing.*, +-- pp.PricePerUnit, +-- pu."PackUnitDescription", +-- ROUND(pp.PricePerUnit * prescribing."Quantity", 2) AS EstPrice +-- FROM [prescribing_data] prescribing +-- LEFT JOIN ProductPrices pp ON prescribing."SNOMEDCode" = pp."ProductSnomedCode" +-- LEFT JOIN PackUnits pu ON prescribing."SNOMEDCode" = pu."ProductSnomedCode" +-- ============================================================================ + +SELECT + pp."ProductSnomedCode", + pp.PricePerUnit, + pu."PackUnitDescription" +FROM ProductPrices pp +LEFT JOIN PackUnits pu ON pp."ProductSnomedCode" = pu."ProductSnomedCode" diff --git a/00_copied_reference/useful_short_queries/MedicineList_Query.sql b/00_copied_reference/useful_short_queries/MedicineList_Query.sql new file mode 100644 index 0000000..c1a9fa5 --- /dev/null +++ b/00_copied_reference/useful_short_queries/MedicineList_Query.sql @@ -0,0 +1,22 @@ +-- ============================================================================= +-- Run BOTH queries and import into a sheet called "Lookup": +-- - Column A = Brand Name (from Query 1) +-- - Column C = Generic Name (from Query 2) +-- ============================================================================= + +-- QUERY 1: Brand Names -> paste into Column A of "Lookup" sheet +SELECT DISTINCT + mad."ProductDescription" AS "Brand Name" +FROM DATA_HUB.DWH."DimMedicineAndDevice" mad +WHERE mad."ProductDescription" IS NOT NULL +ORDER BY mad."ProductDescription"; + + +-- QUERY 2: Generic Names -> paste into Column C of "Lookup" sheet +SELECT DISTINCT + gen."ProductDescription" AS "Generic Name" +FROM DATA_HUB.DWH."DimMedicineAndDevice" mad +INNER JOIN DATA_HUB.DWH."DimMedicineAndDevice" gen + ON mad."MedicinalLatestSnomedCode" = gen."ProductSnomedCode" +WHERE gen."ProductDescription" IS NOT NULL +ORDER BY gen."ProductDescription"; diff --git a/00_copied_reference/useful_short_queries/latest_prescribing_event_date.sql b/00_copied_reference/useful_short_queries/latest_prescribing_event_date.sql new file mode 100644 index 0000000..beb004b --- /dev/null +++ b/00_copied_reference/useful_short_queries/latest_prescribing_event_date.sql @@ -0,0 +1,9 @@ +-- Latest Data Query for Snowflake +-- Returns the most recent prescribing system event date. +-- DateMedicationStart can include future-dated medication records, so this uses +-- DateEventRecorded as the data freshness marker. + +SELECT + MAX(CAST("DateEventRecorded" AS DATE)) AS "LatestEventRecordedDate" +FROM PRIMARY_CARE.TPP."SRPrimaryCareMedication" +WHERE "DateEventRecorded" >= DATEADD('MONTH', -3, CURRENT_DATE()); diff --git a/00_copied_reference/useful_short_queries/practice_list_norfolk_waveney.sql b/00_copied_reference/useful_short_queries/practice_list_norfolk_waveney.sql new file mode 100644 index 0000000..95c2494 --- /dev/null +++ b/00_copied_reference/useful_short_queries/practice_list_norfolk_waveney.sql @@ -0,0 +1,24 @@ +/* +================================================================================ +HF_07_PracticeList.sql +Distinct list of GP Practices in Norfolk & Waveney ICB +================================================================================ + +PURPOSE: +Reference list of GP practice codes and names for the heart failure analysis. + +CREATED: 2026-01-13 +================================================================================ +*/ + +SELECT DISTINCT + org."SiteCode" AS "Practice Code", + org."OrganisationName" AS "Practice Name", + org."PlaceName" AS "Place", + org."PCNName" AS "PCN" +FROM DATA_HUB.DWH."DimOrganisationAndSite" org +WHERE org."IsSiteActive" = 'Yes' + AND org."SiteType" = 'Parent' + AND org."IsSiteNorfolkAndWaveney" = 'Yes' + AND org."OrganisationSubType" = 'GP Practice' +ORDER BY org."OrganisationName"; diff --git a/00_copied_reference/useful_short_queries/registered_population_by_practice.sql b/00_copied_reference/useful_short_queries/registered_population_by_practice.sql new file mode 100644 index 0000000..f8e5360 --- /dev/null +++ b/00_copied_reference/useful_short_queries/registered_population_by_practice.sql @@ -0,0 +1,12 @@ +-- REGISTERED POPULATION: Practice population counts for all Norfolk & Waveney GP practices +-- Returns OrganisationName and RegisteredPopulation for reporting and per-capita calculations + +SELECT DISTINCT + "OrganisationName", + "RegisteredPopulation" +FROM DATA_HUB.DWH."DimOrganisationAndSite" +WHERE "IsSiteNorfolkAndWaveney" = 'Yes' + AND "IsSiteActive" = 'Yes' + AND "OrganisationSubType" = 'GP Practice' + AND "OrganisationName" <> 'Vulnerable Adults Service' +ORDER BY "OrganisationName" diff --git a/01_medicine_lookups/dispensing_by_vtm_or_vmp.sql b/01_medicine_lookups/dispensing_by_vtm_or_vmp.sql new file mode 100644 index 0000000..cd3c6af --- /dev/null +++ b/01_medicine_lookups/dispensing_by_vtm_or_vmp.sql @@ -0,0 +1,78 @@ +/* + Dispensing by VTM or VMP + ======================== + + Purpose: + Summarise dispensed items, patients, and quantity for a medicine group + using national dispensing data. + + Data source: + NATIONAL.GPMED."MedicinesDispensedInPrimarycare" + + Notes: + - Dispensing data is official/payment-oriented and usually lags prescribing. + - Set either VTM_SNOMED_CODE or VMP_SNOMED_CODE. Leave the other as NULL. + - ProcessingPeriodDate is a month-level period date. +*/ + +SET START_PERIOD = '2025-04-01'; +SET END_PERIOD = '2026-03-31'; +SET VTM_SNOMED_CODE = NULL; +SET VMP_SNOMED_CODE = 'REPLACE_WITH_VMP_SNOMED_CODE'; + +WITH products AS ( + -- Expands the selected VTM/VMP into product-level dm+d codes found in GPMED. + SELECT DISTINCT + "ProductSnomedCode", + "ProductDescription", + "TherapeuticMoietyName", + "BNFCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE ($VTM_SNOMED_CODE IS NOT NULL AND "TherapeuticMoietySnomedCode" = $VTM_SNOMED_CODE) + OR ($VMP_SNOMED_CODE IS NOT NULL AND "MedicinalLatestSnomedCode" = $VMP_SNOMED_CODE) + OR ($VMP_SNOMED_CODE IS NOT NULL AND "ProductSnomedCode" = $VMP_SNOMED_CODE) +), +practices AS ( + -- Restricts output to active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName", + "PCNName", + "PlaceName", + "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +) +SELECT + gpm."ProcessingPeriodDate" AS "PeriodDate", + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + prod."TherapeuticMoietyName", + COUNT(DISTINCT gpm."PatientPseudonym") AS "Patients", + SUM(gpm."ItemCount") AS "Items", + SUM(gpm."PaidQuantity") AS "PaidQuantity" +FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" gpm +INNER JOIN products prod + ON gpm."PaiddmdCode" = prod."ProductSnomedCode" +INNER JOIN practices p + ON gpm."CostCentreODSCode" = p."PracticeCode" +WHERE gpm."ProcessingPeriodDate" BETWEEN $START_PERIOD AND $END_PERIOD + AND gpm."PatientPseudonym" IS NOT NULL +GROUP BY + gpm."ProcessingPeriodDate", + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + prod."TherapeuticMoietyName" +ORDER BY + "PeriodDate", + "PracticeName", + prod."TherapeuticMoietyName"; diff --git a/01_medicine_lookups/medicine_reference_lookup.sql b/01_medicine_lookups/medicine_reference_lookup.sql new file mode 100644 index 0000000..c45beaf --- /dev/null +++ b/01_medicine_lookups/medicine_reference_lookup.sql @@ -0,0 +1,49 @@ +/* + Medicine reference lookup + ========================= + + Purpose: + Search DATA_HUB.DWH."DimMedicineAndDevice" for dm+d products and their + related BNF, VTM, VMP, AMP, and pack-level identifiers. + + Common uses: + - Find the SNOMED product codes to use in a prescribing or dispensing query. + - Check whether a medicine group is better captured by BNF, VTM, VMP, or + explicit product codes. + - Understand how products roll up through the dm+d hierarchy. + + Replace the SEARCH_TEXT and optional BNF_PREFIX before running. +*/ + +SET SEARCH_TEXT = 'tirzepatide'; +SET BNF_PREFIX = NULL; + +SELECT DISTINCT + med."ProductSnomedCode", + med."ProductDescription", + med."ProductLevel", + med."TherapeuticMoietySnomedCode", + med."TherapeuticMoietyName", + med."MedicinalLatestSnomedCode", + med."ParentPresentationSnomedCode", + med."BNFCode", + med."BNFParagraphCode", + med."RouteName", + med."StrengthDescription", + med."PackUnitDescription", + med."IndicativePricePerUnit", + med."DrugTariffPricePerUnit" +FROM DATA_HUB.DWH."DimMedicineAndDevice" med +WHERE ( + LOWER(med."ProductDescription") LIKE '%' || LOWER($SEARCH_TEXT) || '%' + OR LOWER(med."TherapeuticMoietyName") LIKE '%' || LOWER($SEARCH_TEXT) || '%' + OR med."ProductSnomedCode" = $SEARCH_TEXT + OR med."TherapeuticMoietySnomedCode" = $SEARCH_TEXT + OR med."MedicinalLatestSnomedCode" = $SEARCH_TEXT +) + AND ($BNF_PREFIX IS NULL OR med."BNFCode" LIKE $BNF_PREFIX || '%') +ORDER BY + med."TherapeuticMoietyName", + med."MedicinalLatestSnomedCode", + med."ProductLevel", + med."ProductDescription"; diff --git a/01_medicine_lookups/prescribing_by_vmp.sql b/01_medicine_lookups/prescribing_by_vmp.sql new file mode 100644 index 0000000..3b3f859 --- /dev/null +++ b/01_medicine_lookups/prescribing_by_vmp.sql @@ -0,0 +1,71 @@ +/* + Current prescribing by VMP + ========================== + + Purpose: + Return patient-level prescribing records for all products under one + Virtual Medicinal Product (VMP). + + Use this when a VTM is too broad and you need a specific formulation, + strength, or medicinal product family. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; +SET VMP_SNOMED_CODE = 'REPLACE_WITH_VMP_SNOMED_CODE'; + +WITH products AS ( + -- Expands one VMP to all matching product SNOMED codes in the medicine dimension. + SELECT DISTINCT + "ProductSnomedCode", + "ProductDescription", + "ProductLevel", + "MedicinalLatestSnomedCode", + "TherapeuticMoietyName", + "BNFCode", + "BNFParagraphCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "MedicinalLatestSnomedCode" = $VMP_SNOMED_CODE + OR "ProductSnomedCode" = $VMP_SNOMED_CODE +), +practices AS ( + -- Restricts output to active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName", + "PCNName", + "PlaceName", + "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +) +SELECT DISTINCT + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + rx."PersonKey", + rx."DateMedicationStart", + prod."ProductDescription", + prod."ProductLevel", + prod."TherapeuticMoietyName", + prod."BNFCode", + TRY_CAST(rx."Quantity" AS FLOAT) AS "Quantity", + rx."QuantityUnit", + rx."Directions", + rx."SourceSystem" +FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx +INNER JOIN products prod + ON rx."SNOMEDCode" = prod."ProductSnomedCode" +INNER JOIN practices p + ON rx."CurrentGeneralPractice" = p."PracticeCode" +WHERE rx."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE + AND rx."PersonKey" IS NOT NULL +ORDER BY + p."PracticeName", + rx."PersonKey", + rx."DateMedicationStart"; diff --git a/01_medicine_lookups/prescribing_by_vtm.sql b/01_medicine_lookups/prescribing_by_vtm.sql new file mode 100644 index 0000000..4a252f7 --- /dev/null +++ b/01_medicine_lookups/prescribing_by_vtm.sql @@ -0,0 +1,74 @@ +/* + Current prescribing by VTM + ========================= + + Purpose: + Return patient-level prescribing records for all products under one + Virtual Therapeutic Moiety (VTM). + + Data source: + REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" + + Notes: + - VTM is useful when you want all products containing the same therapeutic + moiety, regardless of brand, strength, or pack. + - The practice CTE is deliberately visible. Change the geography filter + there if the report should not be Norfolk and Suffolk. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; +SET VTM_SNOMED_CODE = 'REPLACE_WITH_VTM_SNOMED_CODE'; + +WITH products AS ( + -- Expands one VTM to all matching product SNOMED codes in the medicine dimension. + SELECT DISTINCT + "ProductSnomedCode", + "ProductDescription", + "TherapeuticMoietyName", + "BNFCode", + "BNFParagraphCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "TherapeuticMoietySnomedCode" = $VTM_SNOMED_CODE +), +practices AS ( + -- Restricts output to active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName", + "PCNName", + "PlaceName", + "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +) +SELECT DISTINCT + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + rx."PersonKey", + rx."DateMedicationStart", + prod."ProductDescription", + prod."TherapeuticMoietyName", + prod."BNFCode", + TRY_CAST(rx."Quantity" AS FLOAT) AS "Quantity", + rx."QuantityUnit", + rx."Directions", + rx."IsRepeatPrescription", + rx."SourceSystem" +FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx +INNER JOIN products prod + ON rx."SNOMEDCode" = prod."ProductSnomedCode" +INNER JOIN practices p + ON rx."CurrentGeneralPractice" = p."PracticeCode" +WHERE rx."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE + AND rx."PersonKey" IS NOT NULL +ORDER BY + p."PracticeName", + rx."PersonKey", + rx."DateMedicationStart"; diff --git a/01_medicine_lookups/prescribing_for_patient_pseudonym.sql b/01_medicine_lookups/prescribing_for_patient_pseudonym.sql new file mode 100644 index 0000000..4f1171c --- /dev/null +++ b/01_medicine_lookups/prescribing_for_patient_pseudonym.sql @@ -0,0 +1,41 @@ +/* + Prescribing for a patient pseudonym + =================================== + + Purpose: + Pull a concise prescribing history for one pseudonym. + + Use carefully: + - Do not commit real patient pseudonyms into shared repos. + - Keep row-level extracts out of git and shared folders unless there is a + clear information governance basis. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; +SET PATIENT_PSEUDONYM = 'REPLACE_WITH_PATIENT_PSEUDONYM'; + +SELECT DISTINCT + dp."PersonKey", + dp."PatientPseudonym", + gp."OrganisationName" AS "CurrentGeneralPracticeName", + rx."DateMedicationStart", + med."ProductDescription", + med."TherapeuticMoietyName", + med."BNFCode", + CAST(rx."Quantity" AS FLOAT) AS "Quantity", + rx."QuantityUnit", + rx."Directions", + rx."IsRepeatPrescription", + rx."SourceSystem" +FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx +INNER JOIN DATA_HUB.DWH."DimPerson" dp + ON rx."PersonKey" = dp."PersonKey" +LEFT JOIN DATA_HUB.DWH."DimMedicineAndDevice" med + ON rx."SNOMEDCode" = med."ProductSnomedCode" +LEFT JOIN DATA_HUB.DWH."DimOrganisationAndSite" gp + ON rx."CurrentGeneralPractice" = gp."OrganisationCode" + AND gp."SiteCode" = gp."OrganisationCode" +WHERE dp."PatientPseudonym" = $PATIENT_PSEUDONYM + AND rx."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE +ORDER BY rx."DateMedicationStart", med."ProductDescription"; diff --git a/02_prescribing_analysis/high_prescribing_practices_quintile_template.sql b/02_prescribing_analysis/high_prescribing_practices_quintile_template.sql new file mode 100644 index 0000000..7b563b1 --- /dev/null +++ b/02_prescribing_analysis/high_prescribing_practices_quintile_template.sql @@ -0,0 +1,187 @@ +/* + High-prescribing practice quintile template + =========================================== + + Purpose: + Identify practices in the highest quintile for one or more prescribing + measures after standardising by registered population. + + Based on the antidepressant high-prescribing analysis, but made generic: + - Replace medicine_products with the medicine definition you need. + - Add/remove measures in practice_counts and final_select. + - Keep the practice and base_population CTEs visible so geography and + denominator decisions are explicit. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; +SET BNF_PREFIX = 'REPLACE_WITH_BNF_PREFIX'; +SET REPORT_TITLE = 'REPLACE_WITH_REPORT_NAME'; + +WITH practices AS ( + -- Practice list and hierarchy columns used in the final report. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName", + "PCNName", + "PlaceName", + "AllianceName", + "INTName", + "RegisteredPopulation" AS "OrganisationRegisteredPopulation", + "RegisteredPopulationSnapshotDate" AS "OrganisationPopulationSnapshotDate" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +), +base_population AS ( + -- Counted denominator: active, known, living patients currently registered to the practice. + SELECT + p."PracticeCode", + dp."PersonKey", + dp."PatientPseudonym", + dp."CurrentAge", + dp."BAME" + FROM DATA_HUB.DWH."DimPerson" dp + INNER JOIN practices p + ON dp."CurrentGeneralPractice" = p."PracticeCode" + WHERE dp."RecordStatus" = 'Active' + AND dp."PersonStatus" = 'Known' + AND dp."CurrentGeneralPractice" IS NOT NULL + AND dp."CurrentGeneralPractice" <> '*' + AND dp."YearMonthDeath" IS NULL +), +practice_population AS ( + SELECT + "PracticeCode", + COUNT(DISTINCT "PersonKey") AS "RegisteredPatients" + FROM base_population + GROUP BY "PracticeCode" +), +medicine_products AS ( + -- Replace this with a VTM/VMP/explicit-code definition if BNF is too broad. + SELECT DISTINCT + "ProductSnomedCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "ProductSnomedCode" IS NOT NULL + AND "BNFCode" LIKE $BNF_PREFIX || '%' +), +medicine_patients AS ( + -- Patient-level numerator cohort before practice aggregation. + SELECT DISTINCT + bp."PracticeCode", + bp."PersonKey", + bp."PatientPseudonym", + bp."CurrentAge", + bp."BAME" + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + INNER JOIN base_population bp + ON rx."PersonKey" = bp."PersonKey" + INNER JOIN medicine_products mp + ON rx."SNOMEDCode" = mp."ProductSnomedCode" + WHERE rx."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE + AND rx."PersonKey" IS NOT NULL +), +practice_counts AS ( + -- Add or remove measure columns here, then mirror them in final_select. + SELECT + "PracticeCode", + COUNT(DISTINCT "PersonKey") AS "PatientsOnMedicine", + COUNT(DISTINCT CASE WHEN "CurrentAge" > 65 THEN "PersonKey" END) AS "PatientsOnMedicineAgedOver65", + COUNT(DISTINCT CASE WHEN "CurrentAge" > 85 THEN "PersonKey" END) AS "PatientsOnMedicineAgedOver85", + COUNT(DISTINCT CASE WHEN "BAME" = 'BAME' THEN "PersonKey" END) AS "PatientsOnMedicineEthnicMinority" + FROM medicine_patients + GROUP BY "PracticeCode" +), +final_select AS ( + -- Per-1000 rates use the counted patient denominator from base_population. + SELECT + $REPORT_TITLE AS "ReportTitle", + $START_DATE::DATE AS "PeriodStartDate", + $END_DATE::DATE AS "PeriodEndDate", + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + p."INTName", + pop."RegisteredPatients", + p."OrganisationRegisteredPopulation", + p."OrganisationPopulationSnapshotDate", + COALESCE(pc."PatientsOnMedicine", 0) AS "PatientsOnMedicine", + ROUND(1000.0 * COALESCE(pc."PatientsOnMedicine", 0) / NULLIF(pop."RegisteredPatients", 0), 2) AS "PatientsOnMedicinePer1000", + COALESCE(pc."PatientsOnMedicineAgedOver65", 0) AS "PatientsOnMedicineAgedOver65", + ROUND(1000.0 * COALESCE(pc."PatientsOnMedicineAgedOver65", 0) / NULLIF(pop."RegisteredPatients", 0), 2) AS "PatientsOnMedicineAgedOver65Per1000", + COALESCE(pc."PatientsOnMedicineAgedOver85", 0) AS "PatientsOnMedicineAgedOver85", + ROUND(1000.0 * COALESCE(pc."PatientsOnMedicineAgedOver85", 0) / NULLIF(pop."RegisteredPatients", 0), 2) AS "PatientsOnMedicineAgedOver85Per1000", + COALESCE(pc."PatientsOnMedicineEthnicMinority", 0) AS "PatientsOnMedicineEthnicMinority", + ROUND(1000.0 * COALESCE(pc."PatientsOnMedicineEthnicMinority", 0) / NULLIF(pop."RegisteredPatients", 0), 2) AS "PatientsOnMedicineEthnicMinorityPer1000" + FROM practices p + INNER JOIN practice_population pop + ON p."PracticeCode" = pop."PracticeCode" + LEFT JOIN practice_counts pc + ON p."PracticeCode" = pc."PracticeCode" +), +with_quintiles AS ( + -- Quintile 5 is the highest rate in each measure. + SELECT + "ReportTitle", + "PeriodStartDate", + "PeriodEndDate", + "PracticeCode", + "PracticeName", + "PCNName", + "PlaceName", + "AllianceName", + "INTName", + "RegisteredPatients", + "OrganisationRegisteredPopulation", + "OrganisationPopulationSnapshotDate", + "PatientsOnMedicine", + "PatientsOnMedicinePer1000", + "PatientsOnMedicineAgedOver65", + "PatientsOnMedicineAgedOver65Per1000", + "PatientsOnMedicineAgedOver85", + "PatientsOnMedicineAgedOver85Per1000", + "PatientsOnMedicineEthnicMinority", + "PatientsOnMedicineEthnicMinorityPer1000", + NTILE(5) OVER (ORDER BY "PatientsOnMedicinePer1000") AS "PatientsOnMedicineQuintile", + NTILE(5) OVER (ORDER BY "PatientsOnMedicineAgedOver65Per1000") AS "AgedOver65Quintile", + NTILE(5) OVER (ORDER BY "PatientsOnMedicineAgedOver85Per1000") AS "AgedOver85Quintile", + NTILE(5) OVER (ORDER BY "PatientsOnMedicineEthnicMinorityPer1000") AS "EthnicMinorityQuintile" + FROM final_select +) +SELECT + "ReportTitle", + "PeriodStartDate", + "PeriodEndDate", + "PracticeCode", + "PracticeName", + "PCNName", + "PlaceName", + "AllianceName", + "INTName", + "RegisteredPatients", + "OrganisationRegisteredPopulation", + "OrganisationPopulationSnapshotDate", + "PatientsOnMedicine", + "PatientsOnMedicinePer1000", + "PatientsOnMedicineQuintile", + "PatientsOnMedicineAgedOver65", + "PatientsOnMedicineAgedOver65Per1000", + "AgedOver65Quintile", + "PatientsOnMedicineAgedOver85", + "PatientsOnMedicineAgedOver85Per1000", + "AgedOver85Quintile", + "PatientsOnMedicineEthnicMinority", + "PatientsOnMedicineEthnicMinorityPer1000", + "EthnicMinorityQuintile" +FROM with_quintiles +WHERE "PatientsOnMedicineQuintile" = 5 + OR "AgedOver65Quintile" = 5 + OR "AgedOver85Quintile" = 5 + OR "EthnicMinorityQuintile" = 5 +ORDER BY + "PatientsOnMedicinePer1000" DESC, + "PracticeName"; diff --git a/02_prescribing_analysis/practice_level_bnf_prescribing_summary.sql b/02_prescribing_analysis/practice_level_bnf_prescribing_summary.sql new file mode 100644 index 0000000..6a35a19 --- /dev/null +++ b/02_prescribing_analysis/practice_level_bnf_prescribing_summary.sql @@ -0,0 +1,79 @@ +/* + Practice-level prescribing summary by BNF prefix + ================================================ + + Purpose: + Produce a compact practice/month summary for prescribing under a BNF prefix. + + Good first query for: + - "How much prescribing activity is there for this BNF section?" + - "Which practices have the most patients/items/cost for this medicine area?" + - A simple numerator for charts or practice packs. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; +SET BNF_PREFIX = '0403'; + +WITH practices AS ( + -- Default reporting geography: active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName", + "PCNName", + "PlaceName", + "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +), +prescribing AS ( + -- Aggregate after joining to the medicine dimension so BNF matching is dm+d-backed. + SELECT + DATE_TRUNC('MONTH', rx."DateMedicationStart")::DATE AS "MonthStartDate", + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + med."BNFParagraphCode", + COUNT(DISTINCT rx."PersonKey") AS "Patients", + COUNT(*) AS "PrescriptionRows", + SUM(TRY_CAST(rx."Quantity" AS FLOAT)) AS "TotalQuantity", + SUM(rx."EstPrice") AS "EstimatedCost" + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + INNER JOIN DATA_HUB.DWH."DimMedicineAndDevice" med + ON rx."SNOMEDCode" = med."ProductSnomedCode" + AND med."BNFCode" LIKE $BNF_PREFIX || '%' + INNER JOIN practices p + ON rx."CurrentGeneralPractice" = p."PracticeCode" + WHERE rx."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE + AND rx."PersonKey" IS NOT NULL + GROUP BY + DATE_TRUNC('MONTH', rx."DateMedicationStart")::DATE, + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + med."BNFParagraphCode" +) +SELECT + "MonthStartDate", + "PracticeCode", + "PracticeName", + "PCNName", + "PlaceName", + "AllianceName", + "BNFParagraphCode", + "Patients", + "PrescriptionRows", + "TotalQuantity", + "EstimatedCost" +FROM prescribing +ORDER BY + "MonthStartDate", + "PracticeName", + "BNFParagraphCode"; diff --git a/02_prescribing_analysis/prescribing_spend_by_patient_template.sql b/02_prescribing_analysis/prescribing_spend_by_patient_template.sql new file mode 100644 index 0000000..0881c61 --- /dev/null +++ b/02_prescribing_analysis/prescribing_spend_by_patient_template.sql @@ -0,0 +1,99 @@ +/* + Prescribing spend by patient template + ===================================== + + Purpose: + Rank patients by estimated prescribing cost over a selected period and + medicine definition. + + Use this when looking for high-cost patients or checking the shape of cost + concentration before doing deeper clinical review. + + Default source is the maintained unified prescribing table, which already + includes parsed quantity and estimated price. If that table is unavailable + or needs rebuilding, see 06_advanced_methods/product_price_and_quantity_parsing_template.sql. + + Keep at least one medicine filter set. Leaving BNF, VTM, and VMP filters all + NULL can scan all prescribing and is usually not a useful starting point. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; +SET BNF_PREFIX = '0403'; +SET VTM_SNOMED_CODE = NULL; +SET VMP_SNOMED_CODE = NULL; +SET TOP_N = 100; + +WITH products AS ( + -- Keep at least one of the filters below populated so the product set is intentional. + SELECT DISTINCT + "ProductSnomedCode", + "ProductDescription", + "TherapeuticMoietyName", + "BNFCode", + "BNFParagraphCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE ($BNF_PREFIX IS NOT NULL AND "BNFCode" LIKE $BNF_PREFIX || '%') + OR ($VTM_SNOMED_CODE IS NOT NULL AND "TherapeuticMoietySnomedCode" = $VTM_SNOMED_CODE) + OR ($VMP_SNOMED_CODE IS NOT NULL AND "MedicinalLatestSnomedCode" = $VMP_SNOMED_CODE) +), +patient_costs AS ( + -- Aggregate first at patient/practice level so ranking is not inflated by joins. + SELECT + rx."PersonKey", + rx."CurrentGeneralPractice" AS "PracticeCode", + COUNT(*) AS "PrescriptionRows", + COUNT(DISTINCT prod."ProductSnomedCode") AS "DistinctProducts", + SUM(COALESCE(rx."EstPrice", 0)) AS "EstimatedCost" + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + INNER JOIN products prod + ON rx."SNOMEDCode" = prod."ProductSnomedCode" + WHERE rx."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE + AND rx."PersonKey" IS NOT NULL + GROUP BY rx."PersonKey", rx."CurrentGeneralPractice" +), +practice_lookup AS ( + -- De-duplicated practice labels prevent site-level duplicates in the ranking output. + SELECT + "OrganisationCode" AS "PracticeCode", + MIN("OrganisationName") AS "PracticeName", + MIN("PCNName") AS "PCNName", + MIN("PlaceName") AS "PlaceName", + MIN("AllianceName") AS "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" + GROUP BY "OrganisationCode" +), +ranked AS ( + SELECT + ROW_NUMBER() OVER (ORDER BY pc."EstimatedCost" DESC NULLS LAST) AS "Rank", + gp."PracticeName", + gp."PCNName", + gp."PlaceName", + gp."AllianceName", + pc."PersonKey", + pc."PracticeCode", + pc."PrescriptionRows", + pc."DistinctProducts", + pc."EstimatedCost" + FROM patient_costs pc + INNER JOIN practice_lookup gp + ON pc."PracticeCode" = gp."PracticeCode" +) +SELECT + "Rank", + "PracticeName", + "PCNName", + "PlaceName", + "AllianceName", + "PersonKey", + "PracticeCode", + "PrescriptionRows", + "DistinctProducts", + "EstimatedCost" +FROM ranked +WHERE "Rank" <= $TOP_N +ORDER BY "Rank"; diff --git a/03_cohorts_and_clinical_coding/cluster_code_lookup.sql b/03_cohorts_and_clinical_coding/cluster_code_lookup.sql new file mode 100644 index 0000000..977fb2c --- /dev/null +++ b/03_cohorts_and_clinical_coding/cluster_code_lookup.sql @@ -0,0 +1,20 @@ +/* + Clinical coding cluster lookup + ============================== + + Purpose: + List SNOMED codes attached to maintained clinical coding clusters. + + Use maintained clusters where possible instead of free-text matching on + SNOMED descriptions. +*/ + +SET CLUSTER_ID = 'DEPR_COD'; + +SELECT DISTINCT + ccs."Cluster_ID", + ccs."SNOMEDCode", + ccs."SNOMEDDescription" +FROM DATA_HUB.PHM."ClinicalCodingClusterSnomedCodes" ccs +WHERE ccs."Cluster_ID" = $CLUSTER_ID +ORDER BY ccs."SNOMEDDescription", ccs."SNOMEDCode"; diff --git a/03_cohorts_and_clinical_coding/monthly_clinical_event_count_by_practice.sql b/03_cohorts_and_clinical_coding/monthly_clinical_event_count_by_practice.sql new file mode 100644 index 0000000..37899a4 --- /dev/null +++ b/03_cohorts_and_clinical_coding/monthly_clinical_event_count_by_practice.sql @@ -0,0 +1,102 @@ +/* + Monthly clinical event count by practice + ======================================== + + Purpose: + Count distinct patients with a clinical code by month and practice, while + returning zero rows for months/practices with no events. + + Good for: + - SMR-type activity counts. + - Condition or review-code monitoring. + - Building a complete month/practice grid for charts. +*/ + +SET START_MONTH = '2024-07-01'; +SET END_MONTH = CURRENT_DATE(); +SET CLUSTER_ID = 'REPLACE_WITH_CLUSTER_ID'; + +WITH clinical_codes AS ( + -- Maintained cluster definitions avoid fragile free-text SNOMED searches. + SELECT DISTINCT "SNOMEDCode" + FROM DATA_HUB.PHM."ClinicalCodingClusterSnomedCodes" + WHERE "Cluster_ID" = $CLUSTER_ID +), +practices AS ( + -- Default to active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName", + "PCNName", + "PlaceName", + "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "SiteCode" = "OrganisationCode" +), +base_population AS ( + -- Keeps the denominator/cohort rule visible before events are counted. + SELECT + p."PracticeCode", + dp."PersonKey", + dp."PatientPseudonym" + FROM DATA_HUB.DWH."DimPerson" dp + INNER JOIN practices p + ON dp."CurrentGeneralPractice" = p."PracticeCode" + WHERE dp."RecordStatus" = 'Active' + AND dp."PersonStatus" = 'Known' + AND dp."YearMonthDeath" IS NULL +), +month_numbers AS ( + -- Generates up to 120 monthly rows; extend if a longer history is needed. + SELECT ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1 AS n + FROM TABLE(GENERATOR(ROWCOUNT => 120)) +), +months AS ( + SELECT DATE_TRUNC('MONTH', DATEADD(MONTH, n, $START_MONTH::DATE))::DATE AS "MonthStartDate" + FROM month_numbers + WHERE DATEADD(MONTH, n, $START_MONTH::DATE) <= $END_MONTH::DATE +), +practice_month_grid AS ( + SELECT + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + m."MonthStartDate" + FROM practices p + CROSS JOIN months m +), +events AS ( + SELECT + bp."PracticeCode", + DATE_TRUNC('MONTH', cc."EventDateTime")::DATE AS "MonthStartDate", + COUNT(DISTINCT bp."PersonKey") AS "PatientsWithEvent" + FROM DATA_HUB.PHM."PrimaryCareClinicalCoding" cc + INNER JOIN clinical_codes c + ON cc."SNOMEDCode" = c."SNOMEDCode" + INNER JOIN base_population bp + ON cc."PatientPseudonym" = bp."PatientPseudonym" + WHERE cc."EventDateTime"::DATE BETWEEN $START_MONTH::DATE AND $END_MONTH::DATE + GROUP BY + bp."PracticeCode", + DATE_TRUNC('MONTH', cc."EventDateTime")::DATE +) +SELECT + pmg."PracticeCode", + pmg."PracticeName", + pmg."PCNName", + pmg."PlaceName", + pmg."AllianceName", + pmg."MonthStartDate", + COALESCE(e."PatientsWithEvent", 0) AS "PatientsWithEvent" +FROM practice_month_grid pmg +LEFT JOIN events e + ON pmg."PracticeCode" = e."PracticeCode" + AND pmg."MonthStartDate" = e."MonthStartDate" +ORDER BY + pmg."PracticeName", + pmg."MonthStartDate"; diff --git a/03_cohorts_and_clinical_coding/prescribing_plus_clinical_code_cohort.sql b/03_cohorts_and_clinical_coding/prescribing_plus_clinical_code_cohort.sql new file mode 100644 index 0000000..fd52ca3 --- /dev/null +++ b/03_cohorts_and_clinical_coding/prescribing_plus_clinical_code_cohort.sql @@ -0,0 +1,114 @@ +/* + Cohort from prescribing plus clinical coding + ============================================ + + Purpose: + Build a patient cohort where a medicine exposure is combined with a clinical + coding condition, for example patients prescribed a medicine who also have + a relevant diagnosis code in a lookback window. + + Replace: + - BNF_PREFIX or use a VTM/VMP product CTE. + - CLUSTER_ID with the maintained cluster needed for the clinical condition. +*/ + +SET PRESCRIBING_START_DATE = '2025-04-01'; +SET PRESCRIBING_END_DATE = '2026-03-31'; +SET CLINICAL_LOOKBACK_YEARS = -2; +SET BNF_PREFIX = '0403'; +SET CLUSTER_ID = 'DEPR_COD'; + +WITH practices AS ( + -- Default reporting geography: active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName", + "PCNName", + "PlaceName", + "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +), +base_population AS ( + -- Active, known, living patients registered to the selected practices. + SELECT + p."PracticeCode", + p."PracticeName", + p."PCNName", + p."PlaceName", + p."AllianceName", + dp."PersonKey", + dp."PatientPseudonym", + dp."CurrentAge" + FROM DATA_HUB.DWH."DimPerson" dp + INNER JOIN practices p + ON dp."CurrentGeneralPractice" = p."PracticeCode" + WHERE dp."RecordStatus" = 'Active' + AND dp."PersonStatus" = 'Known' + AND dp."YearMonthDeath" IS NULL +), +medicine_products AS ( + -- Replace with VTM/VMP/explicit product logic if BNF prefix is too broad. + SELECT DISTINCT "ProductSnomedCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "BNFCode" LIKE $BNF_PREFIX || '%' +), +clinical_codes AS ( + -- Maintained cluster definitions avoid fragile free-text SNOMED searches. + SELECT DISTINCT "SNOMEDCode" + FROM DATA_HUB.PHM."ClinicalCodingClusterSnomedCodes" + WHERE "Cluster_ID" = $CLUSTER_ID +), +prescribed_patients AS ( + -- Medicine-exposed patients in the prescribing window. + SELECT DISTINCT + bp."PracticeCode", + bp."PracticeName", + bp."PCNName", + bp."PlaceName", + bp."AllianceName", + bp."PersonKey", + bp."PatientPseudonym", + bp."CurrentAge" + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + INNER JOIN base_population bp + ON rx."PersonKey" = bp."PersonKey" + INNER JOIN medicine_products mp + ON rx."SNOMEDCode" = mp."ProductSnomedCode" + WHERE rx."DateMedicationStart" BETWEEN $PRESCRIBING_START_DATE AND $PRESCRIBING_END_DATE +), +coded_patients AS ( + -- Look back from the end of the prescribing period for matching clinical events. + SELECT DISTINCT + pp."PersonKey", + MIN(cc."EventDateTime"::DATE) AS "FirstMatchingClinicalEventDate", + MAX(cc."EventDateTime"::DATE) AS "LatestMatchingClinicalEventDate" + FROM prescribed_patients pp + INNER JOIN DATA_HUB.PHM."PrimaryCareClinicalCoding" cc + ON pp."PatientPseudonym" = cc."PatientPseudonym" + INNER JOIN clinical_codes c + ON cc."SNOMEDCode" = c."SNOMEDCode" + WHERE cc."EventDateTime"::DATE BETWEEN DATEADD('YEAR', $CLINICAL_LOOKBACK_YEARS, $PRESCRIBING_END_DATE::DATE) + AND $PRESCRIBING_END_DATE::DATE + GROUP BY pp."PersonKey" +) +SELECT + pp."PracticeCode", + pp."PracticeName", + pp."PCNName", + pp."PlaceName", + pp."AllianceName", + pp."PersonKey", + pp."CurrentAge", + CASE WHEN cp."PersonKey" IS NOT NULL THEN 1 ELSE 0 END AS "HasClinicalCodeInLookback", + cp."FirstMatchingClinicalEventDate", + cp."LatestMatchingClinicalEventDate" +FROM prescribed_patients pp +LEFT JOIN coded_patients cp + ON pp."PersonKey" = cp."PersonKey" +ORDER BY + pp."PracticeName", + pp."PersonKey"; diff --git a/04_rolling_and_pqs/baseline_vs_evaluation_template.sql b/04_rolling_and_pqs/baseline_vs_evaluation_template.sql new file mode 100644 index 0000000..3767524 --- /dev/null +++ b/04_rolling_and_pqs/baseline_vs_evaluation_template.sql @@ -0,0 +1,138 @@ +/* + Baseline vs rolling evaluation template + ======================================= + + Purpose: + Compare a fixed baseline cohort with rolling evaluation periods. + + Typical use: + - A medicines optimisation search where the baseline is fixed. + - Later periods check whether the same patients still meet criteria. + - Output is practice x period, preserving the baseline count. + + Replace medicine_products and the evaluation criteria before use. +*/ + +SET BASELINE_START = '2025-04-01'; +SET BASELINE_END = '2025-06-30'; +SET FIRST_EVALUATION_START = '2025-04-01'; +SET EVALUATION_MONTHS = 3; +SET BNF_PREFIX = 'REPLACE_WITH_BNF_PREFIX'; + +WITH latest_prescribing_date AS ( + -- Caps generated periods to data already present in the prescribing table. + SELECT MAX("DateMedicationStart")::DATE AS "MaxDate" + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" + WHERE "DateMedicationStart" <= CURRENT_DATE() +), +periods AS ( + -- Generates candidate month starts; increase ROWCOUNT for longer projects. + SELECT + ROW_NUMBER() OVER (ORDER BY SEQ4()) AS "PeriodNumber", + DATEADD(MONTH, ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1, $FIRST_EVALUATION_START::DATE)::DATE AS "PeriodStartDate" + FROM TABLE(GENERATOR(ROWCOUNT => 48)) +), +evaluation_periods AS ( + SELECT + "PeriodNumber", + "PeriodStartDate", + LAST_DAY(DATEADD(MONTH, $EVALUATION_MONTHS - 1, "PeriodStartDate"))::DATE AS "PeriodEndDate" + FROM periods + CROSS JOIN latest_prescribing_date + WHERE "PeriodStartDate" <= "MaxDate" +), +date_bounds AS ( + SELECT + $BASELINE_START::DATE AS "MinEventDate", + GREATEST($BASELINE_END::DATE, COALESCE(MAX("PeriodEndDate"), $BASELINE_END::DATE)) AS "MaxEventDate" + FROM evaluation_periods +), +practices AS ( + -- Default to active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "PracticeName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +), +medicine_products AS ( + -- Replace this CTE for VTM, VMP, explicit SNOMED, or cluster-based definitions. + SELECT DISTINCT "ProductSnomedCode" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "BNFCode" LIKE $BNF_PREFIX || '%' +), +medicine_events AS ( + -- Pull the smallest event window needed for both baseline and evaluation. + SELECT DISTINCT + rx."PersonKey", + rx."CurrentGeneralPractice" AS "PracticeCode", + rx."DateMedicationStart" + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + CROSS JOIN date_bounds db + INNER JOIN medicine_products mp + ON rx."SNOMEDCode" = mp."ProductSnomedCode" + WHERE rx."PersonKey" IS NOT NULL + AND rx."CurrentGeneralPractice" IS NOT NULL + AND rx."DateMedicationStart" BETWEEN db."MinEventDate" AND db."MaxEventDate" +), +baseline_cohort AS ( + -- Fixes each patient to the practice recorded during the baseline window. + SELECT DISTINCT + me."PersonKey", + me."PracticeCode" + FROM medicine_events me + WHERE me."DateMedicationStart" BETWEEN $BASELINE_START AND $BASELINE_END +), +baseline_counts AS ( + SELECT + "PracticeCode", + COUNT(DISTINCT "PersonKey") AS "BaselineCount" + FROM baseline_cohort + GROUP BY "PracticeCode" +), +practice_periods AS ( + -- Ensures every active practice returns one row per evaluation period. + SELECT + p."PracticeCode", + p."PracticeName", + ep."PeriodNumber", + ep."PeriodStartDate", + ep."PeriodEndDate" + FROM practices p + CROSS JOIN evaluation_periods ep +), +evaluation_counts AS ( + SELECT + bc."PracticeCode", + ep."PeriodNumber", + ep."PeriodStartDate", + ep."PeriodEndDate", + COUNT(DISTINCT me."PersonKey") AS "EvaluationCount" + FROM baseline_cohort bc + CROSS JOIN evaluation_periods ep + LEFT JOIN medicine_events me + ON me."PersonKey" = bc."PersonKey" + AND me."DateMedicationStart" BETWEEN ep."PeriodStartDate" AND ep."PeriodEndDate" + GROUP BY bc."PracticeCode", ep."PeriodNumber", ep."PeriodStartDate", ep."PeriodEndDate" +) +SELECT + pp."PracticeCode", + pp."PracticeName", + pp."PeriodNumber", + pp."PeriodStartDate", + pp."PeriodEndDate", + COALESCE(bc."BaselineCount", 0) AS "BaselineCount", + COALESCE(ec."EvaluationCount", 0) AS "EvaluationCount", + COALESCE(bc."BaselineCount", 0) - COALESCE(ec."EvaluationCount", 0) AS "ReductionFromBaseline" +FROM practice_periods pp +LEFT JOIN baseline_counts bc + ON pp."PracticeCode" = bc."PracticeCode" +LEFT JOIN evaluation_counts ec + ON pp."PracticeCode" = ec."PracticeCode" + AND pp."PeriodNumber" = ec."PeriodNumber" +ORDER BY + pp."PracticeName", + pp."PeriodNumber"; diff --git a/04_rolling_and_pqs/dual_source_long_format_measure_template.sql b/04_rolling_and_pqs/dual_source_long_format_measure_template.sql new file mode 100644 index 0000000..9dfe181 --- /dev/null +++ b/04_rolling_and_pqs/dual_source_long_format_measure_template.sql @@ -0,0 +1,147 @@ +/* + Dual-source long-format measure template + ======================================== + + Purpose: + Produce the same measure from dispensing and prescribing data, then return + a tidy long output: + + OrganisationName, PeriodStartDate, PeriodEndDate, DataSource, + Measure, Indicator, Value + + Why this shape: + - Easy to append multiple measures. + - Easy to chart in Excel or Power BI. + - Avoids changing columns every time a new indicator is added. + + Replace the BNF filter and measure-specific calculations before use. +*/ + +SET FIRST_PERIOD_END_DATE = '2025-06-30'; +SET LAST_PERIOD_END_DATE = LAST_DAY(DATEADD('MONTH', -1, CURRENT_DATE())); +SET LOOKBACK_MONTHS = 12; +SET BNF_PREFIX = '0501'; +SET MEASURE_ID = 'M_REPLACE'; + +WITH RECURSIVE date_periods AS ( + -- Month-end series; cap LAST_PERIOD_END_DATE before sharing a final report. + SELECT $FIRST_PERIOD_END_DATE::DATE AS "PeriodEndDate" + UNION ALL + SELECT LAST_DAY(DATEADD(MONTH, 1, "PeriodEndDate"))::DATE AS "PeriodEndDate" + FROM date_periods + WHERE "PeriodEndDate" < $LAST_PERIOD_END_DATE::DATE +), +rolling_periods AS ( + -- Converts each period end into a fixed lookback window. + SELECT + DATEADD(MONTH, 1 - $LOOKBACK_MONTHS, DATE_TRUNC('MONTH', "PeriodEndDate"))::DATE AS "PeriodStartDate", + "PeriodEndDate" + FROM date_periods +), +date_bounds AS ( + SELECT + MIN("PeriodStartDate") AS "MinPeriodStartDate", + MAX("PeriodEndDate") AS "MaxPeriodEndDate" + FROM rolling_periods +), +practices AS ( + -- Default reporting geography: active Norfolk and Suffolk parent GP practices. + SELECT DISTINCT + "OrganisationCode" AS "PracticeCode", + "OrganisationName" AS "OrganisationName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +), +practice_periods AS ( + SELECT + p."PracticeCode", + p."OrganisationName", + rp."PeriodStartDate", + rp."PeriodEndDate" + FROM practices p + CROSS JOIN rolling_periods rp +), +prescribing_events AS ( + -- Pre-filter prescribing once to keep later rolling joins smaller. + SELECT + rx."PersonKey", + rx."CurrentGeneralPractice", + rx."DateMedicationStart", + rx."Quantity", + rx."EstPrice" + FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx + CROSS JOIN date_bounds db + INNER JOIN DATA_HUB.DWH."DimMedicineAndDevice" med + ON rx."SNOMEDCode" = med."ProductSnomedCode" + AND med."BNFCode" LIKE $BNF_PREFIX || '%' + WHERE rx."PersonKey" IS NOT NULL + AND rx."DateMedicationStart" BETWEEN db."MinPeriodStartDate" AND db."MaxPeriodEndDate" +), +dispensing_agg AS ( + -- Dispensing is official paid activity; use ProcessingPeriodDate for periods. + SELECT + pp."OrganisationName", + pp."PeriodStartDate", + pp."PeriodEndDate", + COUNT(DISTINCT gpm."PatientPseudonym") AS "Patients", + COALESCE(SUM(gpm."ItemCount"), 0) AS "Items", + COALESCE(SUM(gpm."PaidQuantity"), 0) AS "Quantity" + FROM practice_periods pp + LEFT JOIN NATIONAL.GPMED."MedicinesDispensedInPrimarycare" gpm + ON pp."PracticeCode" = gpm."CostCentreODSCode" + AND gpm."ProcessingPeriodDate" BETWEEN pp."PeriodStartDate" AND pp."PeriodEndDate" + AND gpm."PaidBNFCode" LIKE $BNF_PREFIX || '%' + AND gpm."PatientPseudonym" IS NOT NULL + GROUP BY pp."OrganisationName", pp."PeriodStartDate", pp."PeriodEndDate" +), +prescribing_agg AS ( + -- Prescribing is current clinical-system activity from the unified table. + SELECT + pp."OrganisationName", + pp."PeriodStartDate", + pp."PeriodEndDate", + COUNT(DISTINCT rx."PersonKey") AS "Patients", + COUNT(rx."PersonKey") AS "Items", + COALESCE(SUM(TRY_CAST(rx."Quantity" AS FLOAT)), 0) AS "Quantity", + COALESCE(SUM(rx."EstPrice"), 0) AS "EstimatedCost" + FROM practice_periods pp + LEFT JOIN prescribing_events rx + ON pp."PracticeCode" = rx."CurrentGeneralPractice" + AND rx."DateMedicationStart" BETWEEN pp."PeriodStartDate" AND pp."PeriodEndDate" + GROUP BY pp."OrganisationName", pp."PeriodStartDate", pp."PeriodEndDate" +) +SELECT + "OrganisationName", + "PeriodStartDate", + "PeriodEndDate", + 'Dispensing' AS "DataSource", + $MEASURE_ID AS "Measure", + 'Patients' AS "Indicator", + CAST("Patients" AS FLOAT) AS "Value" +FROM dispensing_agg +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", 'Dispensing', $MEASURE_ID, 'Items', CAST("Items" AS FLOAT) +FROM dispensing_agg +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", 'Dispensing', $MEASURE_ID, 'Quantity', CAST("Quantity" AS FLOAT) +FROM dispensing_agg +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", 'Prescribing', $MEASURE_ID, 'Patients', CAST("Patients" AS FLOAT) +FROM prescribing_agg +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", 'Prescribing', $MEASURE_ID, 'Items', CAST("Items" AS FLOAT) +FROM prescribing_agg +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", 'Prescribing', $MEASURE_ID, 'Quantity', CAST("Quantity" AS FLOAT) +FROM prescribing_agg +UNION ALL +SELECT "OrganisationName", "PeriodStartDate", "PeriodEndDate", 'Prescribing', $MEASURE_ID, 'EstimatedCost', CAST("EstimatedCost" AS FLOAT) +FROM prescribing_agg +ORDER BY + "PeriodEndDate", + "DataSource", + "OrganisationName", + "Indicator"; diff --git a/04_rolling_and_pqs/latest_data_dates.sql b/04_rolling_and_pqs/latest_data_dates.sql new file mode 100644 index 0000000..be8b57e --- /dev/null +++ b/04_rolling_and_pqs/latest_data_dates.sql @@ -0,0 +1,36 @@ +/* + Latest data dates + ================= + + Purpose: + Check freshness anchors before deciding reporting periods. + + Notes: + - Dispensing uses ProcessingPeriodDate and usually lags. + - Unified prescribing DateMedicationStart can include future starts, so the + TPP DateEventRecorded probe is included as a more conservative source + event freshness marker. +*/ + +SELECT + 'NATIONAL.GPMED.MedicinesDispensedInPrimarycare' AS "Source", + MAX("ProcessingPeriodDate")::DATE AS "LatestDate" +FROM NATIONAL.GPMED."MedicinesDispensedInPrimarycare" + +UNION ALL + +SELECT + 'REPORTING_DATASETS_ICB.SCRATCHPAD.MEDS__UnifiedPrescribingTable DateMedicationStart' AS "Source", + MAX("DateMedicationStart")::DATE AS "LatestDate" +FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" +WHERE "DateMedicationStart" <= CURRENT_DATE() + +UNION ALL + +SELECT + 'PRIMARY_CARE.TPP.SRPrimaryCareMedication DateEventRecorded' AS "Source", + MAX(CAST("DateEventRecorded" AS DATE)) AS "LatestDate" +FROM PRIMARY_CARE.TPP."SRPrimaryCareMedication" +WHERE "DateEventRecorded" >= DATEADD('MONTH', -3, CURRENT_DATE()) +ORDER BY "Source"; + diff --git a/04_rolling_and_pqs/rolling_period_generator.sql b/04_rolling_and_pqs/rolling_period_generator.sql new file mode 100644 index 0000000..42a275f --- /dev/null +++ b/04_rolling_and_pqs/rolling_period_generator.sql @@ -0,0 +1,39 @@ +/* + Rolling period generator + ======================== + + Purpose: + Generate reusable rolling reporting windows for monthly outputs. + + Use this first when a measure needs: + - one row per practice per period; + - a rolling lookback window; + - stable PeriodStartDate and PeriodEndDate columns for Excel or Power BI. +*/ + +SET FIRST_PERIOD_END_DATE = '2025-06-30'; +SET LAST_PERIOD_END_DATE = LAST_DAY(DATEADD('MONTH', -1, CURRENT_DATE())); +SET LOOKBACK_MONTHS = 12; + +WITH RECURSIVE date_periods AS ( + -- Month-end series; cap LAST_PERIOD_END_DATE before sharing a final report. + SELECT $FIRST_PERIOD_END_DATE::DATE AS "PeriodEndDate" + UNION ALL + SELECT LAST_DAY(DATEADD(MONTH, 1, "PeriodEndDate"))::DATE AS "PeriodEndDate" + FROM date_periods + WHERE "PeriodEndDate" < $LAST_PERIOD_END_DATE::DATE +), +rolling_periods AS ( + -- Converts each period end into a fixed lookback window. + SELECT + DATEADD(MONTH, 1 - $LOOKBACK_MONTHS, DATE_TRUNC('MONTH', "PeriodEndDate"))::DATE AS "PeriodStartDate", + "PeriodEndDate", + $LOOKBACK_MONTHS AS "LookbackMonths" + FROM date_periods +) +SELECT + "PeriodStartDate", + "PeriodEndDate", + "LookbackMonths" +FROM rolling_periods +ORDER BY "PeriodEndDate"; diff --git a/05_audit_detail/human_readable_prescribing_detail.sql b/05_audit_detail/human_readable_prescribing_detail.sql new file mode 100644 index 0000000..8b7dc82 --- /dev/null +++ b/05_audit_detail/human_readable_prescribing_detail.sql @@ -0,0 +1,74 @@ +/* + Human-readable prescribing detail + ================================= + + Purpose: + Return row-level prescribing detail with medicine and organisation codes + replaced by useful names. + + Use this after an aggregate measure finds a cohort that needs checking. + Keep the output explicit. Avoid SELECT rx.* in shared audit extracts. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; +SET BNF_PREFIX = '0501'; + +WITH practices AS ( + -- Default reporting geography: active Norfolk and Suffolk parent GP practices. + SELECT + "OrganisationCode", + MIN("OrganisationName") AS "OrganisationName", + MIN("PCNName") AS "PCNName", + MIN("PlaceName") AS "PlaceName", + MIN("AllianceName") AS "AllianceName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" + GROUP BY "OrganisationCode" +), +organisations AS ( + -- Broader organisation lookup for prescribers/providers that are not GP practices. + SELECT + "OrganisationCode", + MIN("OrganisationName") AS "OrganisationName" + FROM DATA_HUB.DWH."DimOrganisationAndSite" + GROUP BY "OrganisationCode" +) +SELECT + prescribing_org."OrganisationName" AS "PrescribingOrganisationName", + registered_gp."OrganisationName" AS "RegisteredPracticeName", + registered_gp."PCNName", + registered_gp."PlaceName", + registered_gp."AllianceName", + rx."PersonKey", + med."ProductDescription" AS "ProductName", + med."TherapeuticMoietyName", + med."BNFCode", + rx."DateMedicationStart", + rx."Name" AS "SourceProductName", + rx."Directions", + rx."Quantity", + rx."QuantityUnit", + rx."IsRepeatPrescription", + rx."MedicinalControlledDrugStatus", + rx."EstPrice", + rx."SourceSystem", + rx."DataHubCreatedDate", + rx."SourceRowKey" +FROM REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" rx +INNER JOIN DATA_HUB.DWH."DimMedicineAndDevice" med + ON med."ProductSnomedCode" = rx."SNOMEDCode" + AND med."BNFCode" LIKE $BNF_PREFIX || '%' +INNER JOIN practices registered_gp + ON registered_gp."OrganisationCode" = rx."CurrentGeneralPractice" +LEFT JOIN organisations prescribing_org + ON prescribing_org."OrganisationCode" = rx."OrgCode" +WHERE rx."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE + AND rx."PersonKey" IS NOT NULL +ORDER BY + registered_gp."OrganisationName", + rx."PersonKey", + rx."DateMedicationStart"; diff --git a/06_advanced_methods/product_price_and_quantity_parsing_template.sql b/06_advanced_methods/product_price_and_quantity_parsing_template.sql new file mode 100644 index 0000000..5e9aba5 --- /dev/null +++ b/06_advanced_methods/product_price_and_quantity_parsing_template.sql @@ -0,0 +1,144 @@ +/* + Product price and quantity parsing template + =========================================== + + Purpose: + Reusable reference CTEs for estimating cost when working from raw EMIS or + TPP prescribing extracts rather than the maintained unified prescribing + table. + + Prefer REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable" + where possible. Use this file when you need to understand or rebuild the + price/quantity logic. +*/ + +SET START_DATE = '2025-04-01'; +SET END_DATE = '2026-03-31'; + +WITH pack_level_data AS ( + -- Pack-level records carry the most direct tariff/indicative unit prices. + SELECT + "ProductSnomedCode", + "ParentPresentationSnomedCode", + "MedicinalLatestSnomedCode", + "ProductLevel", + "PackUnitDescription", + COALESCE( + "DrugTariffPricePerUnit", + "IndicativePricePerUnit", + CASE WHEN "AnnualQuantity" > 0 THEN "AnnualCost" / "AnnualQuantity" END + ) / CASE WHEN LOWER("PackUnitDescription") IN ('litre', 'litres') THEN 1000 ELSE 1 END AS "PricePerUnit" + FROM DATA_HUB.DWH."DimMedicineAndDevice" + WHERE "ProductLevel" IN ('VMPP', 'AMPP') +), +vmp_family_prices AS ( + -- Fallback average where a pack has no direct price but belongs to a VMP family. + SELECT + "MedicinalLatestSnomedCode" AS "VmpCode", + AVG("PricePerUnit") AS "PricePerUnit" + FROM pack_level_data + WHERE "PricePerUnit" IS NOT NULL + GROUP BY "MedicinalLatestSnomedCode" +), +product_prices AS ( + -- Returns one product-to-unit-price lookup across pack and presentation levels. + SELECT "ProductSnomedCode", "PricePerUnit" + FROM pack_level_data + WHERE "PricePerUnit" IS NOT NULL + + UNION ALL + + SELECT + pld."ProductSnomedCode", + vfp."PricePerUnit" + FROM pack_level_data pld + INNER JOIN vmp_family_prices vfp + ON pld."MedicinalLatestSnomedCode" = vfp."VmpCode" + WHERE pld."PricePerUnit" IS NULL + + UNION ALL + + SELECT + parent."ProductSnomedCode", + COALESCE(direct_avg."PricePerUnit", vfp."PricePerUnit") AS "PricePerUnit" + FROM DATA_HUB.DWH."DimMedicineAndDevice" parent + LEFT JOIN ( + SELECT + "ParentPresentationSnomedCode" AS "ParentCode", + AVG("PricePerUnit") AS "PricePerUnit" + FROM pack_level_data + WHERE "PricePerUnit" IS NOT NULL + GROUP BY "ParentPresentationSnomedCode" + ) direct_avg + ON direct_avg."ParentCode" = parent."ProductSnomedCode" + LEFT JOIN vmp_family_prices vfp + ON parent."MedicinalLatestSnomedCode" = vfp."VmpCode" + WHERE parent."ProductLevel" IN ('VMP', 'AMP') + AND COALESCE(direct_avg."PricePerUnit", vfp."PricePerUnit") IS NOT NULL +), +tpp_quantity_parsed AS ( + -- Parse common TPP free-text quantity patterns before estimating cost. + SELECT + med."IDPatient", + med."IDOrganisation", + med."IDMultiLexDMD" AS "ProductSnomedCode", + CAST(med."DateMedicationStart" AS DATE) AS "DateMedicationStart", + med."MedicationQuantity" AS "SourceQuantityText", + COALESCE( + CASE + WHEN REGEXP_SUBSTR(LOWER(med."MedicationQuantity"), '([0-9]+)\\s*packs?\\s+of\\s+([0-9]+)', 1, 1, 'e', 1) IS NOT NULL + THEN TRY_CAST(REGEXP_SUBSTR(LOWER(med."MedicationQuantity"), '([0-9]+)\\s*packs?\\s+of', 1, 1, 'e', 1) AS FLOAT) + * TRY_CAST(REGEXP_SUBSTR(LOWER(med."MedicationQuantity"), 'of\\s+([0-9]+)', 1, 1, 'e', 1) AS FLOAT) + END, + CASE + WHEN REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+)\\s*\\*\\s*([0-9]+)', 1, 1, 'e', 1) IS NOT NULL + THEN TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+)', 1, 1, 'e', 1) AS FLOAT) + * TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '\\*\\s*([0-9]+)', 1, 1, 'e', 1) AS FLOAT) + END, + CASE + WHEN REGEXP_SUBSTR(med."MedicationQuantity", '^1\\s*x\\s*([0-9]+\\.?[0-9]*)', 1, 1, 'e', 1) IS NOT NULL + THEN TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '^1\\s*x\\s*([0-9]+\\.?[0-9]*)', 1, 1, 'e', 1) AS FLOAT) + END, + CASE + WHEN REGEXP_SUBSTR(TRIM(med."MedicationQuantity"), '^[0-9]+\\.?[0-9]*$') IS NOT NULL + THEN TRY_CAST(TRIM(med."MedicationQuantity") AS FLOAT) + END, + CASE + WHEN REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+\\.?[0-9]*)') IS NOT NULL + THEN TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+\\.?[0-9]*)', 1, 1, 'e', 1) AS FLOAT) + END + ) AS "ParsedQuantity" + FROM PRIMARY_CARE.TPP."SRPrimaryCareMedication" med + WHERE med."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE +), +example_tpp_output AS ( + -- Example consumer query; adapt this CTE for a specific medicine/cohort. + SELECT + dp."PersonKey", + tpp."IDOrganisation" AS "OrgCode", + tpp."ProductSnomedCode", + tpp."DateMedicationStart", + tpp."SourceQuantityText", + tpp."ParsedQuantity", + pp."PricePerUnit", + ROUND(tpp."ParsedQuantity" * pp."PricePerUnit", 2) AS "EstimatedPrice" + FROM tpp_quantity_parsed tpp + INNER JOIN PRIMARY_CARE.TPP."SRPatient" pat + ON tpp."IDPatient" = pat."IDPatient" + INNER JOIN DATA_HUB.DWH."DimPerson" dp + ON pat."PatientPseudonym" = dp."PatientPseudonym" + LEFT JOIN product_prices pp + ON tpp."ProductSnomedCode" = pp."ProductSnomedCode" + WHERE pat."PatientPseudonym" IS NOT NULL +) +SELECT + "PersonKey", + "OrgCode", + "ProductSnomedCode", + "DateMedicationStart", + "SourceQuantityText", + "ParsedQuantity", + "PricePerUnit", + "EstimatedPrice" +FROM example_tpp_output +LIMIT 100; diff --git a/README.md b/README.md new file mode 100644 index 0000000..be71275 --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# Snowflake Medicines Optimisation Query Templates + +Curated Snowflake SQL templates for medicines optimisation analysis. The aim is to give a new analyst a clean starting point without copying the whole working query folder, including old experiments and one-off scripts. + +The templates are written for analysts who already understand medicines datasets and reporting concepts, but who may not have pharmacy training. Clinical judgement, formulary interpretation, and patient-level action should still be checked with the relevant clinical lead. + +## Start Here + +1. Use `docs/data_sources_and_join_patterns.md` to understand the main tables and joins. +2. Use `01_medicine_lookups/medicine_reference_lookup.sql` to identify the right BNF, VTM, VMP, or product codes. +3. Read `docs/sql_style_and_validation_guardrails.md` when adapting a template for the first time. +4. Check `docs/template_validation_status.md` to see what was validated against Snowflake. +5. Pick the closest template folder below. +6. Replace the visible `SET` values and any `REPLACE_WITH...` placeholders. +7. Run a small test first, then check output columns before using the result in a report. + +## Folder Guide + +`00_copied_reference/` + +Original files copied from the working repo because they were already template-like. Use them as reference, not as the preferred starting point. + +`01_medicine_lookups/` + +Lookup and extraction templates for medicines by VTM, VMP, BNF, dispensing product, or patient pseudonym. + +`02_prescribing_analysis/` + +Practice-level prescribing summaries, high-prescribing quintiles, and patient-level cost ranking. + +`03_cohorts_and_clinical_coding/` + +Templates for combining prescribing exposure with clinical coding, maintained SNOMED clusters, and month/practice event grids. + +`04_rolling_and_pqs/` + +Rolling-period, long-format, baseline-vs-evaluation, and latest-data templates. These are useful for PQS-style reporting and repeatable monthly outputs. + +`05_audit_detail/` + +Row-level audit extract pattern with medicine and organisation names added. Use this after an aggregate query identifies records worth checking. + +`06_advanced_methods/` + +More technical reference logic, currently including raw TPP quantity parsing and product price lookup. Prefer maintained tables unless you need to rebuild or audit the method. + +`docs/` + +Plain-language notes on data sources, joins, common medicines-analysis terms, SQL guardrails, and the copied introductory SQL/Snowflake training programme. + +## Common Defaults + +For Norfolk/Suffolk practice reporting, templates use: + +```sql +WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +``` + +Change this deliberately if the report is Norfolk and Waveney only, branch-site level, provider-level, or national. + +## Practical Checks Before Sharing Outputs + +- Confirm whether the question needs prescribing data or dispensing data. +- Check latest available dates before setting the reporting period. +- Keep denominator logic visible in the SQL. +- Use maintained coding clusters before text-searching SNOMED descriptions. +- Avoid `SELECT *` in shared audit extracts. +- Do not commit patient pseudonyms, row-level extracts, CSVs, or spreadsheets. + +## Template Sources + +The cleaned templates were derived from recurring patterns in the working query repo, including: + +- VTM/VMP prescribing and dispensing checks. +- Practice-level high-prescribing analysis. +- Prescribing plus clinical coding cohorts. +- SMR/frailty-style monthly practice grids. +- PQS rolling-period and long-format outputs. +- Human-readable audit extracts using medicine and organisation dimensions. diff --git a/docs/data_sources_and_join_patterns.md b/docs/data_sources_and_join_patterns.md new file mode 100644 index 0000000..80e6d4c --- /dev/null +++ b/docs/data_sources_and_join_patterns.md @@ -0,0 +1,81 @@ +# Data Sources And Join Patterns + +This repo assumes access to the Norfolk and Suffolk Snowflake environment used for medicines optimisation analysis. + +## Core Medicines Sources + +`REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable"` + +Use this for current prescribing analysis when available. It combines EMIS and TPP prescribing into a single shape with `PersonKey`, `SNOMEDCode`, `DateMedicationStart`, quantity, estimated price, source system, prescribing organisation, and current registered GP. + +`NATIONAL.GPMED."MedicinesDispensedInPrimarycare"` + +Use this for official dispensing activity. It is usually slower to refresh than prescribing but is better aligned to dispensing/payment concepts. Key columns include `ProcessingPeriodDate`, `PatientPseudonym`, `PaiddmdCode`, `PaidBNFCode`, `CostCentreODSCode`, `ItemCount`, `PaidQuantity`, and `TotalPaidGross`. + +`DATA_HUB.DWH."DimMedicineAndDevice"` + +Use this as the medicine reference table. It links SNOMED product codes to BNF, VTM, VMP, product descriptions, routes, strengths, and indicative price fields. + +`DATA_HUB.DWH."DimOrganisationAndSite"` + +Use this for practice names and hierarchy columns such as PCN, Place, Alliance, and INT. For Norfolk/Suffolk practice reports, the common filter is: + +```sql +WHERE "OrganisationSubType" = 'GP Practice' + AND "IsSiteActive" = 'Yes' + AND "IsSiteNorfolkAndSuffolk" = 'Yes' + AND "SiteCode" = "OrganisationCode" +``` + +`DATA_HUB.DWH."DimPerson"` + +Use this for registered GP, age, demographic fields, and pseudonym-to-person links. For Suffolk-inclusive work, avoid old Norfolk-and-Waveney-only registration filters unless the report is explicitly Norfolk and Waveney only. + +`DATA_HUB.PHM."PrimaryCareClinicalCoding"` + +Use this for clinical coding events. Join on `PatientPseudonym`, then filter by `SNOMEDCode` and `EventDateTime`. + +`DATA_HUB.PHM."ClinicalCodingClusterSnomedCodes"` + +Use maintained clinical coding clusters where possible. This is usually safer than searching SNOMED descriptions with text matching. + +## Common Joins + +Prescribing to medicine: + +```sql +INNER JOIN DATA_HUB.DWH."DimMedicineAndDevice" med + ON rx."SNOMEDCode" = med."ProductSnomedCode" +``` + +Prescribing to registered practice: + +```sql +INNER JOIN DATA_HUB.DWH."DimOrganisationAndSite" gp + ON rx."CurrentGeneralPractice" = gp."OrganisationCode" + AND gp."SiteCode" = gp."OrganisationCode" +``` + +Dispensing to medicine: + +```sql +INNER JOIN DATA_HUB.DWH."DimMedicineAndDevice" med + ON gpm."PaiddmdCode" = med."ProductSnomedCode" +``` + +Clinical coding to maintained cluster: + +```sql +INNER JOIN DATA_HUB.PHM."ClinicalCodingClusterSnomedCodes" c + ON cc."SNOMEDCode" = c."SNOMEDCode" + AND c."Cluster_ID" = 'REPLACE_WITH_CLUSTER_ID' +``` + +## Choosing The Date Field + +Use `ProcessingPeriodDate` for dispensing. + +Use `DateMedicationStart` for prescribing activity windows. + +Use `DateEventRecorded` as a conservative freshness marker for TPP prescribing extracts when checking whether the latest full month is safe to report. + diff --git a/docs/glossary_for_medicines_analysts.md b/docs/glossary_for_medicines_analysts.md new file mode 100644 index 0000000..8d1b57a --- /dev/null +++ b/docs/glossary_for_medicines_analysts.md @@ -0,0 +1,54 @@ +# Glossary For Medicines Analysts + +This is a practical glossary for analysts working with medicines data. It is not clinical guidance. + +## Medicine Coding + +`dm+d`: Dictionary of medicines and devices. The source of SNOMED product codes used for medicines. + +`SNOMEDCode`: A coded identifier. In medicines queries this is usually a dm+d product code. In clinical coding queries it is usually a clinical event code. + +`VTM`: Virtual Therapeutic Moiety. Broad ingredient-level grouping, useful when you want all products for a medicine substance. + +`VMP`: Virtual Medicinal Product. More specific product family, useful when strength or formulation matters. + +`AMP`: Actual Medicinal Product. Branded or supplier-specific product. + +`VMPP` / `AMPP`: Pack-level products. + +`BNFCode`: British National Formulary hierarchy code. Useful for broad prescribing sections such as antibiotics or antidepressants. + +## Activity Sources + +`Prescribing`: Records from GP clinical systems showing prescriptions/issues. More current, but not the same as dispensed supply. + +`Dispensing`: BSA/GPMeds data showing items dispensed and paid. Better for payment-style reporting, usually with a lag. + +`ProcessingPeriodDate`: The month attached to dispensing data. + +`DateMedicationStart`: The prescribing date used in the unified prescribing table. + +## People And Organisations + +`PersonKey`: Internal person identifier used for linked analysis. Prefer this for patient counts where available. + +`PatientPseudonym`: Pseudonymised patient identifier. Needed for some joins to clinical coding and dispensing. + +`CurrentGeneralPractice`: Practice code attached to the patient or prescribing row. + +`OrganisationCode` / `SiteCode`: Organisation and site identifiers. For practice-level reporting, use the parent practice row where `SiteCode = OrganisationCode` unless branch-level reporting is intended. + +`PCN`, `Place`, `Alliance`, `INT`: Organisation hierarchy fields used for grouping practice outputs. + +## Analysis Terms + +`Cohort`: A defined set of patients meeting criteria. + +`Denominator`: The population used to calculate a rate, for example registered patients at a practice. + +`Numerator`: The count meeting the measure, for example patients prescribed a medicine. + +`Quintile`: One of five ranked groups. Quintile 5 is often used for highest prescribing when ranking from low to high. + +`Long format`: Output where each indicator is a row rather than a separate column. This is useful for appending measures and charting. + diff --git a/docs/sql_style_and_validation_guardrails.md b/docs/sql_style_and_validation_guardrails.md new file mode 100644 index 0000000..f5a8195 --- /dev/null +++ b/docs/sql_style_and_validation_guardrails.md @@ -0,0 +1,42 @@ +# SQL Style And Validation Guardrails + +These notes are extracted from repeated medicines optimisation query work. They are intended to prevent common Snowflake and medicines-data errors. + +## Snowflake Syntax + +- Double-quote table and column identifiers, especially mixed-case columns such as `"PatientPseudonym"` and `"ProcessingPeriodDate"`. +- Quote aliases that will be consumed by Excel, Power BI, Python, or another SQL layer: `COUNT(*) AS "PatientCount"`. +- Use `CURRENT_DATE()` or `CURRENT_TIMESTAMP()` rather than T-SQL functions such as `GETDATE()`. +- Use `LIMIT` for quick checks. +- Cast long-format output values to a consistent numeric type before `UNION ALL`. + +## Medicines Table Choices + +- Use `REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable"` for current prescribing analysis when available. +- Use `NATIONAL.GPMED."MedicinesDispensedInPrimarycare"` for official dispensed/paid activity. +- Do not treat prescribing and dispensing as interchangeable. They answer related but different questions. +- Map prescribing SNOMED codes to BNF through `DATA_HUB.DWH."DimMedicineAndDevice"`. +- Use `"ProductDescription"` for medicine names from `DimMedicineAndDevice`. Do not assume a `"ProductName"` column. + +## Geography And Denominators + +- Keep the practice CTE visible in each report. +- For Norfolk/Suffolk GP practice outputs, use `DATA_HUB.DWH."DimOrganisationAndSite"` with `"IsSiteNorfolkAndSuffolk" = 'Yes'`. +- Only use older Norfolk-and-Waveney flags when the report is explicitly Norfolk and Waveney only. +- Be explicit about whether you are using organisation registered population, a counted active patient denominator, or another denominator. + +## Clinical Coding + +- Prefer maintained clusters in `DATA_HUB.PHM."ClinicalCodingClusterSnomedCodes"` over text searching SNOMED descriptions. +- Use `DATA_HUB.PHM."PrimaryCareClinicalCoding"` for unified clinical coding across systems. +- Join clinical coding to patients through `PatientPseudonym`, then to `DimPerson` when person or practice fields are needed. +- Make lookback windows visible in `SET` variables or a date CTE. + +## Validation Before Sharing + +- Check latest data dates before selecting a period. +- Run a limited version first and inspect row counts. +- Check the output column list before handing results to someone else. +- Avoid `SELECT *` in audit/detail outputs. +- Do not commit patient pseudonyms, CSV exports, spreadsheets, images, or local tooling files. + diff --git a/docs/template_validation_status.md b/docs/template_validation_status.md new file mode 100644 index 0000000..e0a11b8 --- /dev/null +++ b/docs/template_validation_status.md @@ -0,0 +1,62 @@ +# Template Validation Status + +Last reviewed: 2026-05-12 + +## Scope + +The reusable SQL templates in folders `01_medicine_lookups` through +`06_advanced_methods` were checked with the Snowflake MCP against live table +metadata and representative parameter values. Templates that use `SET` +variables were validated by substituting safe example literals into the final +`SELECT` statement before running `describe_query`. + +The `00_copied_reference` folder and `docs/training_intro_snowflake_sql` are +historic/reference material. They are retained for learning and comparison, +not as the preferred starting point for new analysis. + +## Live Metadata Checked + +- `REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable"` +- `NATIONAL.GPMED."MedicinesDispensedInPrimarycare"` +- `DATA_HUB.DWH."DimMedicineAndDevice"` +- `DATA_HUB.DWH."DimOrganisationAndSite"` +- `DATA_HUB.DWH."DimPerson"` +- `DATA_HUB.PHM."ClinicalCodingClusterSnomedCodes"` +- `DATA_HUB.PHM."PrimaryCareClinicalCoding"` +- `PRIMARY_CARE.TPP."SRPrimaryCareMedication"` +- `PRIMARY_CARE.TPP."SRPatient"` + +## Query Compilation Checks + +All files below passed Snowflake MCP `describe_query` validation: + +- `01_medicine_lookups/medicine_reference_lookup.sql` +- `01_medicine_lookups/prescribing_by_vtm.sql` +- `01_medicine_lookups/prescribing_by_vmp.sql` +- `01_medicine_lookups/dispensing_by_vtm_or_vmp.sql` +- `01_medicine_lookups/prescribing_for_patient_pseudonym.sql` +- `02_prescribing_analysis/practice_level_bnf_prescribing_summary.sql` +- `02_prescribing_analysis/high_prescribing_practices_quintile_template.sql` +- `02_prescribing_analysis/prescribing_spend_by_patient_template.sql` +- `03_cohorts_and_clinical_coding/cluster_code_lookup.sql` +- `03_cohorts_and_clinical_coding/monthly_clinical_event_count_by_practice.sql` +- `03_cohorts_and_clinical_coding/prescribing_plus_clinical_code_cohort.sql` +- `04_rolling_and_pqs/rolling_period_generator.sql` +- `04_rolling_and_pqs/latest_data_dates.sql` +- `04_rolling_and_pqs/baseline_vs_evaluation_template.sql` +- `04_rolling_and_pqs/dual_source_long_format_measure_template.sql` +- `05_audit_detail/human_readable_prescribing_detail.sql` +- `06_advanced_methods/product_price_and_quantity_parsing_template.sql` + +## Freshness Probe + +The `latest_data_dates.sql` query was also executed with Snowflake MCP. At the +time of review it returned: + +- `NATIONAL.GPMED."MedicinesDispensedInPrimarycare"` latest + `ProcessingPeriodDate`: `2025-07-01` +- `PRIMARY_CARE.TPP."SRPrimaryCareMedication"` latest `DateEventRecorded` + within the recent-window probe: `2026-03-28` +- `REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable"` latest + non-future `DateMedicationStart`: `2026-05-12` +