diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md index 9877277..6ab26c1 100644 --- a/IMPLEMENTATION_PLAN.md +++ b/IMPLEMENTATION_PLAN.md @@ -107,12 +107,12 @@ Drawer selection → update_drug_selection → app-state store → load_pathway_ - **Checkpoint**: `python -c "from dash_app.data.queries import load_initial_data; print(load_initial_data())"` returns valid data ### 1.2 Build directorate card tree from DimSearchTerm.csv -- [ ] Create `dash_app/data/card_browser.py` with: +- [x] Create `dash_app/data/card_browser.py` with: - `build_directorate_tree()` → dict structured as `{PrimaryDirectorate: {Search_Term: [drug_fragment, ...]}}` - Loads `data/DimSearchTerm.csv`, groups by PrimaryDirectorate → Search_Term → split CleanedDrugName by pipe - Applies SEARCH_TERM_MERGE_MAP from `data_processing.diagnosis_lookup` (merge asthma variants) - `get_all_drugs()` → sorted flat list of all unique drug labels from `pathway_nodes` level 3 -- **Checkpoint**: `python -c "from dash_app.data.card_browser import build_directorate_tree; import json; print(json.dumps(build_directorate_tree(), indent=2))"` returns valid tree +- **Checkpoint**: `python -c "from dash_app.data.card_browser import build_directorate_tree; import json; print(json.dumps(build_directorate_tree(), indent=2))"` returns valid tree ✓ --- diff --git a/dash_app/data/card_browser.py b/dash_app/data/card_browser.py new file mode 100644 index 0000000..4f23800 --- /dev/null +++ b/dash_app/data/card_browser.py @@ -0,0 +1,83 @@ +""" +Directorate card tree builder for the drug browser drawer. + +Loads DimSearchTerm.csv and builds a nested structure: + {PrimaryDirectorate: {Search_Term: [drug_fragment, ...]}} + +Also provides get_all_drugs() for the flat "All Drugs" card. +""" + +import csv +from collections import defaultdict +from pathlib import Path + +from data_processing.diagnosis_lookup import SEARCH_TERM_MERGE_MAP + +DATA_DIR = Path(__file__).resolve().parents[2] / "data" +DIM_SEARCH_TERM_PATH = DATA_DIR / "DimSearchTerm.csv" + + +def build_directorate_tree() -> dict[str, dict[str, list[str]]]: + """ + Build a nested dict from DimSearchTerm.csv grouped by directorate. + + Returns: + { + "CARDIOLOGY": { + "acute coronary syndrome": ["ABCIXIMAB", "CLOPIDOGREL", ...], + "atrial fibrillation": ["APIXABAN", "DABIGATRAN", ...], + ... + }, + "CLINICAL HAEMATOLOGY": { ... }, + ... + } + + Search_Term values are normalized via SEARCH_TERM_MERGE_MAP + (e.g. "allergic asthma" → "asthma"). Drug fragments within + merged terms are combined and deduplicated. + """ + # directorate → search_term → set of drug fragments + tree: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set)) + + with open(DIM_SEARCH_TERM_PATH, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + search_term = (row.get("Search_Term") or "").strip().lower() + drug_names_raw = row.get("CleanedDrugName") or "" + directorate = (row.get("PrimaryDirectorate") or "").strip().upper() + + if not search_term or not directorate: + continue + + # Apply merge map (e.g. "allergic asthma" → "asthma") + search_term = SEARCH_TERM_MERGE_MAP.get(search_term, search_term) + + fragments = [ + frag.strip().upper() + for frag in drug_names_raw.split("|") + if frag.strip() + ] + + tree[directorate][search_term].update(fragments) + + # Convert sets → sorted lists and sort at every level + result: dict[str, dict[str, list[str]]] = {} + for directorate in sorted(tree): + result[directorate] = { + term: sorted(tree[directorate][term]) + for term in sorted(tree[directorate]) + } + + return result + + +def get_all_drugs() -> list[str]: + """ + Return a sorted flat list of all unique drug labels from pathway_nodes level 3. + + Delegates to load_initial_data() which already queries the database. + """ + from dash_app.data.queries import load_initial_data + + data = load_initial_data() + return data.get("available_drugs", []) diff --git a/progress.txt b/progress.txt index 2702249..33d62e9 100644 --- a/progress.txt +++ b/progress.txt @@ -132,7 +132,7 @@ Migrating the HCD Analysis frontend from Reflex to Dash (Plotly) + Dash Mantine - `src/data_processing/pathway_queries.py` — NEW: shared query functions - `dash_app/data/queries.py` — NEW: thin Dash wrapper with DB_PATH resolution - `IMPLEMENTATION_PLAN.md` — Task 1.1 marked [x] -### Committed: (pending) +### Committed: b71748f "feat: add shared pathway query functions for Dash data access (Task 1.1)" ### Patterns discovered: - `src/` is on sys.path only when using `uv run` (via .pth file created by setup_dev.py). Running `python` directly won't find `data_processing` module. Always use `uv run python` for testing. - `total_records` from `pathway_refresh_log` returns 0 — the refresh log's `source_row_count` field appears empty despite `completed_at` having a value. This is cosmetic — the KPI can use `unique_patients` from chart-data instead. @@ -145,3 +145,44 @@ Migrating the HCD Analysis frontend from Reflex to Dash (Plotly) + Dash Mantine - Remember: drug fragments in CleanedDrugName are UPPERCASE substrings, not exact matches ### Blocked items: - None + +## Iteration 3 — 2026-02-06 +### Task: Phase 1 — Task 1.2 (Build directorate card tree from DimSearchTerm.csv) +### Why this task: +- Phase 1 data access layer needs both query functions (done in 1.1) AND the card browser data (this task) +- Progress.txt from iteration 2 explicitly recommended this task +- The drawer component (Phase 4) depends on this data structure +### Status: COMPLETE +### What was done: +- Created `dash_app/data/card_browser.py` with two functions: + - `build_directorate_tree()` — reads DimSearchTerm.csv, applies SEARCH_TERM_MERGE_MAP, returns nested dict: {PrimaryDirectorate: {Search_Term: [drug_fragment, ...]}} + - `get_all_drugs()` — delegates to `load_initial_data()` from queries.py (no SQL duplication) +- SEARCH_TERM_MERGE_MAP correctly imported from `data_processing.diagnosis_lookup` +- Asthma variants ("allergic asthma", "severe persistent allergic asthma") merge into single "asthma" entry with combined/deduplicated drug fragments +- Tree uses sets during construction to deduplicate drug fragments from merged terms, then converts to sorted lists +### Validation results: +- Tier 1 (Code): `from dash_app.data.card_browser import build_directorate_tree` — OK +- Tier 1 (App starts): `from dash_app.app import app` — OK, layout type is MantineProvider +- Tier 3 (Functional): + - `build_directorate_tree()`: 19 directorates, correctly nested dict + - CARDIOLOGY has 10 indications, CLINICAL HAEMATOLOGY has many more + - Asthma merge verified: only "asthma" present (not "allergic asthma" or "severe persistent allergic asthma"), OMALIZUMAB included in merged drug list + - `get_all_drugs()`: 42 drugs from pathway_nodes level 3 +### Files changed: +- `dash_app/data/card_browser.py` — NEW: directorate tree builder + drug list accessor +- `IMPLEMENTATION_PLAN.md` — Task 1.2 marked [x] +### Committed: [pending] +### Patterns discovered: +- `get_all_drugs()` uses a lazy import (`from dash_app.data.queries import load_initial_data`) to avoid circular imports since both modules are in `dash_app/data/` +- Drug fragments in DimSearchTerm.csv are already UPPERCASE in the CSV, but `.upper()` is applied defensively +- "diabetes" appears twice in DimSearchTerm.csv (DIABETIC MEDICINE and OPHTHALMOLOGY) — this is handled naturally by the directorate grouping (same search_term under two different directorates) +### Next iteration should: +- Start Phase 2: Task 2.1 — Header + sidebar components +- Read `01_nhs_classic.html` to get the exact HTML structure, CSS classes, and SVG icons +- Create `dash_app/components/header.py` with `make_header()` function +- Create `dash_app/components/sidebar.py` with `make_sidebar()` function +- Use CSS classes from nhs.css (`.top-header`, `.sidebar`, etc.) — NOT inline styles +- Header needs placeholder `html.Span` with IDs for callback updates: `id="header-record-count"`, `id="header-last-updated"` +- Sidebar needs click targets for drawer open (Drug Selection, Indications items) +### Blocked items: +- None