initial commit

This commit is contained in:
Andrew Charlwood
2026-05-12 16:40:03 +01:00
commit 647d1bfa7f
38 changed files with 2715 additions and 0 deletions
@@ -0,0 +1,144 @@
/*
Product price and quantity parsing template
===========================================
Purpose:
Reusable reference CTEs for estimating cost when working from raw EMIS or
TPP prescribing extracts rather than the maintained unified prescribing
table.
Prefer REPORTING_DATASETS_ICB.SCRATCHPAD."MEDS__UnifiedPrescribingTable"
where possible. Use this file when you need to understand or rebuild the
price/quantity logic.
*/
SET START_DATE = '2025-04-01';
SET END_DATE = '2026-03-31';
WITH pack_level_data AS (
-- Pack-level records carry the most direct tariff/indicative unit prices.
SELECT
"ProductSnomedCode",
"ParentPresentationSnomedCode",
"MedicinalLatestSnomedCode",
"ProductLevel",
"PackUnitDescription",
COALESCE(
"DrugTariffPricePerUnit",
"IndicativePricePerUnit",
CASE WHEN "AnnualQuantity" > 0 THEN "AnnualCost" / "AnnualQuantity" END
) / CASE WHEN LOWER("PackUnitDescription") IN ('litre', 'litres') THEN 1000 ELSE 1 END AS "PricePerUnit"
FROM DATA_HUB.DWH."DimMedicineAndDevice"
WHERE "ProductLevel" IN ('VMPP', 'AMPP')
),
vmp_family_prices AS (
-- Fallback average where a pack has no direct price but belongs to a VMP family.
SELECT
"MedicinalLatestSnomedCode" AS "VmpCode",
AVG("PricePerUnit") AS "PricePerUnit"
FROM pack_level_data
WHERE "PricePerUnit" IS NOT NULL
GROUP BY "MedicinalLatestSnomedCode"
),
product_prices AS (
-- Returns one product-to-unit-price lookup across pack and presentation levels.
SELECT "ProductSnomedCode", "PricePerUnit"
FROM pack_level_data
WHERE "PricePerUnit" IS NOT NULL
UNION ALL
SELECT
pld."ProductSnomedCode",
vfp."PricePerUnit"
FROM pack_level_data pld
INNER JOIN vmp_family_prices vfp
ON pld."MedicinalLatestSnomedCode" = vfp."VmpCode"
WHERE pld."PricePerUnit" IS NULL
UNION ALL
SELECT
parent."ProductSnomedCode",
COALESCE(direct_avg."PricePerUnit", vfp."PricePerUnit") AS "PricePerUnit"
FROM DATA_HUB.DWH."DimMedicineAndDevice" parent
LEFT JOIN (
SELECT
"ParentPresentationSnomedCode" AS "ParentCode",
AVG("PricePerUnit") AS "PricePerUnit"
FROM pack_level_data
WHERE "PricePerUnit" IS NOT NULL
GROUP BY "ParentPresentationSnomedCode"
) direct_avg
ON direct_avg."ParentCode" = parent."ProductSnomedCode"
LEFT JOIN vmp_family_prices vfp
ON parent."MedicinalLatestSnomedCode" = vfp."VmpCode"
WHERE parent."ProductLevel" IN ('VMP', 'AMP')
AND COALESCE(direct_avg."PricePerUnit", vfp."PricePerUnit") IS NOT NULL
),
tpp_quantity_parsed AS (
-- Parse common TPP free-text quantity patterns before estimating cost.
SELECT
med."IDPatient",
med."IDOrganisation",
med."IDMultiLexDMD" AS "ProductSnomedCode",
CAST(med."DateMedicationStart" AS DATE) AS "DateMedicationStart",
med."MedicationQuantity" AS "SourceQuantityText",
COALESCE(
CASE
WHEN REGEXP_SUBSTR(LOWER(med."MedicationQuantity"), '([0-9]+)\\s*packs?\\s+of\\s+([0-9]+)', 1, 1, 'e', 1) IS NOT NULL
THEN TRY_CAST(REGEXP_SUBSTR(LOWER(med."MedicationQuantity"), '([0-9]+)\\s*packs?\\s+of', 1, 1, 'e', 1) AS FLOAT)
* TRY_CAST(REGEXP_SUBSTR(LOWER(med."MedicationQuantity"), 'of\\s+([0-9]+)', 1, 1, 'e', 1) AS FLOAT)
END,
CASE
WHEN REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+)\\s*\\*\\s*([0-9]+)', 1, 1, 'e', 1) IS NOT NULL
THEN TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+)', 1, 1, 'e', 1) AS FLOAT)
* TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '\\*\\s*([0-9]+)', 1, 1, 'e', 1) AS FLOAT)
END,
CASE
WHEN REGEXP_SUBSTR(med."MedicationQuantity", '^1\\s*x\\s*([0-9]+\\.?[0-9]*)', 1, 1, 'e', 1) IS NOT NULL
THEN TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '^1\\s*x\\s*([0-9]+\\.?[0-9]*)', 1, 1, 'e', 1) AS FLOAT)
END,
CASE
WHEN REGEXP_SUBSTR(TRIM(med."MedicationQuantity"), '^[0-9]+\\.?[0-9]*$') IS NOT NULL
THEN TRY_CAST(TRIM(med."MedicationQuantity") AS FLOAT)
END,
CASE
WHEN REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+\\.?[0-9]*)') IS NOT NULL
THEN TRY_CAST(REGEXP_SUBSTR(med."MedicationQuantity", '^([0-9]+\\.?[0-9]*)', 1, 1, 'e', 1) AS FLOAT)
END
) AS "ParsedQuantity"
FROM PRIMARY_CARE.TPP."SRPrimaryCareMedication" med
WHERE med."DateMedicationStart" BETWEEN $START_DATE AND $END_DATE
),
example_tpp_output AS (
-- Example consumer query; adapt this CTE for a specific medicine/cohort.
SELECT
dp."PersonKey",
tpp."IDOrganisation" AS "OrgCode",
tpp."ProductSnomedCode",
tpp."DateMedicationStart",
tpp."SourceQuantityText",
tpp."ParsedQuantity",
pp."PricePerUnit",
ROUND(tpp."ParsedQuantity" * pp."PricePerUnit", 2) AS "EstimatedPrice"
FROM tpp_quantity_parsed tpp
INNER JOIN PRIMARY_CARE.TPP."SRPatient" pat
ON tpp."IDPatient" = pat."IDPatient"
INNER JOIN DATA_HUB.DWH."DimPerson" dp
ON pat."PatientPseudonym" = dp."PatientPseudonym"
LEFT JOIN product_prices pp
ON tpp."ProductSnomedCode" = pp."ProductSnomedCode"
WHERE pat."PatientPseudonym" IS NOT NULL
)
SELECT
"PersonKey",
"OrgCode",
"ProductSnomedCode",
"DateMedicationStart",
"SourceQuantityText",
"ParsedQuantity",
"PricePerUnit",
"EstimatedPrice"
FROM example_tpp_output
LIMIT 100;