From fe2d048a21c962eec65bfddb9ab8a4313e162c46 Mon Sep 17 00:00:00 2001 From: Andrew Charlwood Date: Fri, 6 Feb 2026 19:13:19 +0000 Subject: [PATCH] feat: add parsing utilities and 8-tab chart infrastructure (Task 9.1) - Create src/data_processing/parsing.py with parse_average_spacing(), parse_pathway_drugs(), and calculate_retention_rate() - Add 8-tab bar to chart_card.py (Icicle, Market Share, Cost Effectiveness, Cost Waterfall, Sankey, Dosing, Heatmap, Duration) - Add active-tab dcc.Store and tab switching callback in chart.py - Remove Chart Views section from sidebar (now in tab bar) - Lazy rendering: only active tab's chart is computed --- IMPLEMENTATION_PLAN.md | 138 ++++++++++++++++++++++++++++++ dash_app/app.py | 1 + dash_app/callbacks/chart.py | 67 +++++++++++++-- dash_app/components/chart_card.py | 39 ++++++++- dash_app/components/sidebar.py | 17 +--- src/data_processing/parsing.py | 117 +++++++++++++++++++++++++ 6 files changed, 351 insertions(+), 28 deletions(-) create mode 100644 src/data_processing/parsing.py diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md index 2ca4a7c..8eb7fa6 100644 --- a/IMPLEMENTATION_PLAN.md +++ b/IMPLEMENTATION_PLAN.md @@ -326,6 +326,129 @@ Drawer selection → update_drug_selection → app-state store → load_pathway_ - [x] ensure filters update the KPI cards at the top to reflect the icicle chart visible --- +## Phase 9: Additional Analytics Charts + +### Design Approach +- Replace sidebar chart view selection with a **tab bar inside `chart_card.py`** +- Each tab renders its chart in the same `dcc.Graph` area +- Only the active tab's chart is computed (lazy rendering) +- Store `active_tab` in `app-state` (default: "icicle") +- All new charts respond to existing filters (date, chart type, trust, drug, directorate) +- New query functions go in `src/data_processing/pathway_queries.py` (shared, not in dash_app/) +- New parsing utilities go in `src/data_processing/pathway_queries.py` (or a new `parsing.py` if large) +- New figure-building functions go in `src/visualization/` (shared, callable from Dash callbacks) +- New callback files in `dash_app/callbacks/` — one per chart type + +### 9.1 Parsing utilities + tab infrastructure +- [x] Create parsing utility functions (in new `src/data_processing/parsing.py`): + - `parse_average_spacing(spacing_html: str) -> list[dict]` — extract drug_name, dose_count, weekly_interval, total_weeks from HTML string + - `parse_pathway_drugs(ids: str, level: int) -> list[str]` — extract ordered drug list from ids column at level 4+ + - `calculate_retention_rate(nodes: list[dict]) -> dict` — for each N-drug pathway, calculate % not escalating to N+1 drugs +- [x] Update `dash_app/components/chart_card.py`: + - Add tab bar with 8 tabs: Icicle, Market Share, Cost Effectiveness, Cost Waterfall, Sankey, Dosing, Heatmap, Duration + - Plain HTML buttons with existing `.chart-tab` / `.chart-tab--active` CSS classes + - Single `dcc.Graph` shared across all tabs (lazy rendering) + - `active_tab` stored in separate `dcc.Store(id="active-tab")` +- [x] Update `dash_app/components/sidebar.py`: + - Remove "Chart Views" section (Icicle/Sankey/Timeline items) — chart selection moves to tab bar + - Keep "Overview" section with "Pathway Overview" +- [x] Update `dash_app/callbacks/chart.py`: + - Tab switching callback: 8 tab button Inputs → `active-tab` store + CSS class Outputs + - `update_chart` checks `active-tab` store and dispatches to correct figure builder + - Icicle renders normally; other tabs show "coming soon" placeholder +- **Checkpoint**: App starts, tab bar renders with all 8 tabs, icicle tab still works, other tabs show placeholder "Coming soon" messages ✓ + +### 9.2 Query functions for all chart types +- [ ] Add to `src/data_processing/pathway_queries.py`: + - `get_drug_market_share(db_path, date_filter_id, chart_type, directory=None, trust=None)` — Level 3 nodes grouped by directory, returning drug, value, colour + - `get_pathway_costs(db_path, date_filter_id, chart_type, directory=None)` — Level 4+ nodes with cost_pp_pa, parsed pathway labels, patient counts + - `get_cost_waterfall(db_path, date_filter_id, chart_type, trust=None)` — Level 2 nodes with cost_pp_pa per directorate/indication + - `get_drug_transitions(db_path, date_filter_id, chart_type, directory=None)` — Level 3+ nodes parsed into source→target drug transitions with patient counts + - `get_dosing_intervals(db_path, date_filter_id, chart_type, drug=None)` — Level 3 nodes for a specific drug, parsed average_spacing by trust/directory + - `get_drug_directory_matrix(db_path, date_filter_id, chart_type)` — Level 3 nodes pivoted as directory × drug with value/cost metrics + - `get_treatment_durations(db_path, date_filter_id, chart_type, directory=None)` — Level 3 nodes with avg_days by drug within a directorate +- [ ] Add thin wrappers in `dash_app/data/queries.py` for each new function (resolve DB_PATH and delegate) +- **Checkpoint**: All 7 query functions return correct data via manual Python tests (`python -c "..."`) + +### 9.3 First-Line Market Share chart (Tab 2) +- [ ] Create `dash_app/callbacks/market_share.py`: + - Build horizontal grouped bar chart from `get_drug_market_share()` data + - One cluster per directorate/indication (top N), bars within = drugs, length = % of patients + - Sorted by total patients desc, NHS blue palette + - Responds to all existing filters +- [ ] Create figure function in `src/visualization/` (e.g., `create_market_share_figure(data)`) +- [ ] Wire into tab switching in `update_chart` callback +- **Checkpoint**: Market Share tab renders real data, responds to filters, icicle still works + +### 9.4 Pathway Cost Effectiveness chart (Tab 3) +- [ ] Create `dash_app/callbacks/pathway_costs.py`: + - Build horizontal lollipop chart from `get_pathway_costs()` data + - Y-axis = pathway label (e.g., "Adalimumab → Secukinumab → Rituximab"), X-axis = £ per patient per annum + - Dot size = patient count, colour gradient: green (cheap) → amber → red (expensive) + - Uses `parse_pathway_drugs()` to extract pathway labels +- [ ] Add retention rate annotations using `calculate_retention_rate()` + - Show as secondary annotation: "Drug B retains 72% of patients" +- [ ] Create figure function in `src/visualization/` +- [ ] Wire into tab switching +- **Checkpoint**: Cost Effectiveness tab renders with lollipop dots and retention annotations + +### 9.5 Cost Waterfall chart (Tab 4) +- [ ] Create `dash_app/callbacks/cost_waterfall.py`: + - Build Plotly waterfall chart from `get_cost_waterfall()` data + - Each bar = one directorate's average cost_pp_pa, sorted highest to lowest + - NHS colours, responds to chart_type toggle, date filter, trust filter +- [ ] Create figure function in `src/visualization/` +- [ ] Wire into tab switching +- **Checkpoint**: Cost Waterfall tab renders real data, responds to filters + +### 9.6 Drug Switching Sankey chart (Tab 5) +- [ ] Create `dash_app/callbacks/sankey.py`: + - Build Plotly Sankey diagram from `get_drug_transitions()` data + - Left nodes = 1st-line drugs, middle = 2nd-line, right = 3rd-line + - Link width = patient count, colour by drug or directorate + - Uses `parse_pathway_drugs()` to extract drug transitions from `ids` column +- [ ] Create figure function in `src/visualization/` +- [ ] Wire into tab switching +- **Checkpoint**: Sankey tab renders real drug transition flows + +### 9.7 Dosing Interval Comparison chart (Tab 6) +- [ ] Create `dash_app/callbacks/dosing.py`: + - Build horizontal grouped bar chart from `get_dosing_intervals()` data + - Uses `parse_average_spacing()` to extract weekly interval numbers + - Y-axis = trust or directorate, X-axis = weekly interval +- [ ] Create figure function in `src/visualization/` +- [ ] Wire into tab switching +- **Checkpoint**: Dosing tab renders real data with parsed interval numbers + +### 9.8 Directorate × Drug Heatmap chart (Tab 7) +- [ ] Create `dash_app/callbacks/heatmap.py`: + - Build Plotly heatmap from `get_drug_directory_matrix()` data + - Rows = directorates (sorted by total patients), columns = drugs (sorted by frequency) + - Cell colour = patient count or cost, hover shows details + - Toggle between patient count / cost / cost_pp_pa colouring (additional control in tab) +- [ ] Create figure function in `src/visualization/` +- [ ] Wire into tab switching +- **Checkpoint**: Heatmap tab renders matrix with correct colour mapping + +### 9.9 Treatment Duration chart (Tab 8) +- [ ] Create `dash_app/callbacks/duration.py`: + - Build horizontal bar chart from `get_treatment_durations()` data + - Y-axis = drug, X-axis = average days, colour intensity by patient count + - Directorate filter drives which drugs are shown +- [ ] Create figure function in `src/visualization/` +- [ ] Wire into tab switching +- **Checkpoint**: Duration tab renders real data, responds to directorate filter + +### 9.10 Final integration + polish +- [ ] Verify all 8 tabs switch smoothly with no unnecessary recomputation +- [ ] Verify each chart responds to filter changes (date, chart type, trust, directorate, drug) +- [ ] Test with both "directory" and "indication" chart types +- [ ] Verify icicle chart still works correctly (no regressions) +- [ ] Update CLAUDE.md with new chart types, callback files, and query functions +- **Checkpoint**: All tabs work, all filters work, no regressions, documentation updated + +--- + ## Completion Criteria All tasks marked `[x]` AND: @@ -343,6 +466,21 @@ All tasks marked `[x]` AND: - [x] Sidebar shows chart views (icicle/sankey/timeline), not filter triggers - [x] Filter bar has drug/trust/directorate trigger buttons with selection count badges +### Phase 9 Completion Criteria +- [ ] 8 chart tabs render in the chart card (Icicle + 7 new) +- [ ] Tab switching is smooth — only active tab's chart is computed +- [ ] All 7 new charts render real data from SQLite +- [ ] All charts respond to existing filters (date, chart type, trust, drug, directorate) +- [ ] Market Share shows grouped bars by directorate with drug breakdown +- [ ] Cost Effectiveness shows lollipop chart with retention annotations +- [ ] Cost Waterfall shows directorate cost_pp_pa bars +- [ ] Sankey shows drug switching flows across treatment lines +- [ ] Dosing shows parsed interval comparisons +- [ ] Heatmap shows directorate × drug matrix +- [ ] Treatment Duration shows avg_days bars +- [ ] Icicle chart has no regressions +- [ ] `python run_dash.py` starts cleanly with all tabs + --- ## Key Reference Files diff --git a/dash_app/app.py b/dash_app/app.py index 35eb1fe..e2947a2 100644 --- a/dash_app/app.py +++ b/dash_app/app.py @@ -29,6 +29,7 @@ app.layout = dmc.MantineProvider( }), dcc.Store(id="chart-data", storage_type="memory"), dcc.Store(id="reference-data", storage_type="session"), + dcc.Store(id="active-tab", storage_type="memory", data="icicle"), dcc.Location(id="url", refresh=False), # Page structure diff --git a/dash_app/callbacks/chart.py b/dash_app/callbacks/chart.py index bacd8e4..51312e0 100644 --- a/dash_app/callbacks/chart.py +++ b/dash_app/callbacks/chart.py @@ -1,11 +1,16 @@ -"""Callbacks for pathway data loading and icicle chart rendering.""" +"""Callbacks for tab switching, pathway data loading, and chart rendering.""" import logging -from dash import Input, Output, no_update +from dash import Input, Output, State, ctx, no_update import plotly.graph_objects as go +from dash_app.components.chart_card import TAB_DEFINITIONS + log = logging.getLogger(__name__) +# Tab IDs for callback inputs +_TAB_IDS = [f"tab-{tab_id}" for tab_id, _ in TAB_DEFINITIONS] + def _empty_figure(message): """Return a blank Plotly figure with a centered message annotation.""" @@ -78,8 +83,44 @@ def _generate_chart_title(app_state): def register_chart_callbacks(app): - """Register pathway data loading and chart rendering callbacks.""" + """Register tab switching, pathway data loading, and chart rendering callbacks.""" + # --- Tab switching callback --- + tab_inputs = [Input(tid, "n_clicks") for tid in _TAB_IDS] + tab_outputs = [Output(tid, "className") for tid in _TAB_IDS] + + @app.callback( + Output("active-tab", "data"), + *tab_outputs, + *tab_inputs, + State("active-tab", "data"), + prevent_initial_call=True, + ) + def switch_tab(*args): + """Handle tab button clicks — update active-tab store and CSS classes.""" + n_tabs = len(_TAB_IDS) + # args layout: n_clicks_0..n_clicks_N-1, current_active_tab + current_tab = args[-1] or "icicle" + + triggered_id = ctx.triggered_id + if not triggered_id: + return (no_update,) * (1 + n_tabs) + + # Determine new active tab from triggered button ID + new_tab = current_tab + for tab_id, (short_id, _) in zip(_TAB_IDS, TAB_DEFINITIONS): + if triggered_id == tab_id: + new_tab = short_id + break + + # Build CSS class outputs + base = "chart-tab" + active = f"{base} chart-tab--active" + classes = [active if short_id == new_tab else base for short_id, _ in TAB_DEFINITIONS] + + return (new_tab, *classes) + + # --- Pathway data loading callback --- @app.callback( Output("chart-data", "data"), Input("app-state", "data"), @@ -115,15 +156,19 @@ def register_chart_callbacks(app): "error": "Database query failed. Check logs for details.", } + # --- Chart rendering callback --- @app.callback( Output("pathway-chart", "figure"), Output("chart-subtitle", "children"), Input("chart-data", "data"), + Input("active-tab", "data"), Input("app-state", "data"), ) - def update_chart(chart_data, app_state): - """Render icicle chart from chart-data nodes.""" + def update_chart(chart_data, active_tab, app_state): + """Render the active tab's chart from chart-data nodes.""" + active_tab = active_tab or "icicle" chart_type = (app_state or {}).get("chart_type", "directory") + if chart_type == "indication": subtitle = "Trust \u2192 Indication \u2192 Drug \u2192 Patient Pathway" else: @@ -142,9 +187,15 @@ def register_chart_callbacks(app): "Try adjusting your filters." ), subtitle - from visualization.plotly_generator import create_icicle_from_nodes + # Lazy rendering — only compute the active tab's chart + if active_tab == "icicle": + from visualization.plotly_generator import create_icicle_from_nodes - title = _generate_chart_title(app_state) if app_state else "" - fig = create_icicle_from_nodes(chart_data["nodes"], title) + title = _generate_chart_title(app_state) if app_state else "" + fig = create_icicle_from_nodes(chart_data["nodes"], title) + else: + # Placeholder for charts not yet implemented + tab_label = dict(TAB_DEFINITIONS).get(active_tab, active_tab) + fig = _empty_figure(f"{tab_label} chart — coming soon") return fig, subtitle diff --git a/dash_app/components/chart_card.py b/dash_app/components/chart_card.py index fb3043b..9eedced 100644 --- a/dash_app/components/chart_card.py +++ b/dash_app/components/chart_card.py @@ -1,19 +1,50 @@ -"""Chart card component — header and dcc.Graph for icicle chart.""" +"""Chart card component — tab bar, header, and dcc.Graph for charts.""" from dash import html, dcc +TAB_DEFINITIONS = [ + ("icicle", "Icicle"), + ("market-share", "Market Share"), + ("cost-effectiveness", "Cost Effectiveness"), + ("cost-waterfall", "Cost Waterfall"), + ("sankey", "Sankey"), + ("dosing", "Dosing"), + ("heatmap", "Heatmap"), + ("duration", "Duration"), +] + + def make_chart_card(): - """Return a chart card matching 01_nhs_classic.html structure. + """Return a chart card with tab bar and dcc.Graph. Contains: - - Header with title and dynamic subtitle (hierarchy label) + - Tab bar with 8 chart tabs (Icicle active by default) + - Header with title and dynamic subtitle - dcc.Loading wrapper around dcc.Graph for loading spinner - Chart view selection (icicle/sankey/timeline) is in the sidebar. """ + tab_buttons = [] + for tab_id, label in TAB_DEFINITIONS: + is_active = tab_id == "icicle" + class_name = "chart-tab chart-tab--active" if is_active else "chart-tab" + tab_buttons.append( + html.Button( + label, + id=f"tab-{tab_id}", + className=class_name, + n_clicks=0, + ) + ) + return html.Section( className="chart-card", **{"aria-label": "Patient pathway chart"}, children=[ + # Tab bar + html.Div( + className="chart-card__tabs", + role="tablist", + children=tab_buttons, + ), # Card header html.Div( className="chart-card__header", diff --git a/dash_app/components/sidebar.py b/dash_app/components/sidebar.py index 47d49fd..777a43a 100644 --- a/dash_app/components/sidebar.py +++ b/dash_app/components/sidebar.py @@ -19,9 +19,6 @@ def _svg_icon(svg_body): # SVG icon bodies (Feather-style) _ICONS = { "pathway": '', - "icicle": '', - "sankey": '', - "timeline": '', } @@ -39,16 +36,6 @@ def make_sidebar(): _sidebar_item("Pathway Overview", "pathway", active=True), ], ), - # Chart views section - html.Div( - className="sidebar__section", - children=[ - html.Div("Chart Views", className="sidebar__label"), - _sidebar_item("Icicle Chart", "icicle", active=True), - _sidebar_item("Sankey Diagram", "sankey", disabled=True), - _sidebar_item("Timeline", "timeline", disabled=True), - ], - ), # Footer html.Div( className="sidebar__footer", @@ -62,13 +49,11 @@ def make_sidebar(): ) -def _sidebar_item(label, icon_key, active=False, disabled=False, item_id=None): +def _sidebar_item(label, icon_key, active=False, item_id=None): """Create a single sidebar navigation item.""" class_name = "sidebar__item" if active: class_name += " sidebar__item--active" - if disabled: - class_name += " sidebar__item--disabled" props = {"className": class_name} if item_id: diff --git a/src/data_processing/parsing.py b/src/data_processing/parsing.py new file mode 100644 index 0000000..0ff94a3 --- /dev/null +++ b/src/data_processing/parsing.py @@ -0,0 +1,117 @@ +"""Parsing utilities for pathway node data. + +Shared functions for extracting structured data from pathway_nodes columns. +Used by analytics chart callbacks in dash_app/callbacks/. +""" +import re + + +def parse_average_spacing(spacing_html): + """Extract dosing information from average_spacing HTML string. + + Args: + spacing_html: HTML like '
DRUG
On average given 35.6 times + with a 9.0 weekly interval (320.0 weeks total treatment length)' + May contain multiple drug entries separated by
. + + Returns: + List of dicts with keys: drug_name, dose_count, weekly_interval, total_weeks. + Returns empty list for None/empty input or unparseable strings. + """ + if not spacing_html: + return [] + + results = [] + pattern = ( + r"([^<]+)
" + r"On average given ([\d.]+) times " + r"with a ([\d.]+) weekly interval " + r"\(([\d.]+) weeks total treatment length\)" + ) + + for match in re.finditer(pattern, spacing_html): + results.append({ + "drug_name": match.group(1).strip(), + "dose_count": float(match.group(2)), + "weekly_interval": float(match.group(3)), + "total_weeks": float(match.group(4)), + }) + + return results + + +def parse_pathway_drugs(ids, level): + """Extract ordered drug list from the ids column at level 4+. + + Args: + ids: String like 'ROOT - TRUST - DIR - DRUG_A - DRUG_B - DRUG_C'. + Segments are separated by ' - '. Drug names start at index 3 + (0=root, 1=trust, 2=directory, 3+=drugs). + level: Node level. Only meaningful for level >= 3. + + Returns: + List of drug names in treatment order. Empty list for level < 3 + or invalid input. + """ + if not ids or level < 3: + return [] + + segments = ids.split(" - ") + # Segments: [root, trust, directory, drug_0, drug_1, ...] + if len(segments) <= 3: + return [] + + return segments[3:] + + +def calculate_retention_rate(nodes): + """Calculate pathway retention rates from node data. + + For each N-drug pathway, calculate what % of patients do NOT escalate + to an N+1 drug pathway. This identifies effective treatment sequences. + + Args: + nodes: List of dicts with 'ids', 'level', 'value' keys. + Should contain level 3+ nodes from a single directorate. + + Returns: + Dict mapping pathway ids to retention info: + {ids: {"retained_patients": int, "total_patients": int, + "retention_rate": float, "drug_sequence": list}} + """ + if not nodes: + return {} + + # Index nodes by ids for parent lookup + node_map = {n["ids"]: n for n in nodes if n.get("ids")} + + results = {} + for node in nodes: + level = node.get("level", 0) + if level < 4: + continue + + node_ids = node.get("ids", "") + total_patients = node.get("value", 0) + if not total_patients: + continue + + # Find child pathways (nodes whose ids start with this node's ids + " - ") + child_prefix = node_ids + " - " + child_patients = sum( + n.get("value", 0) + for n in nodes + if n.get("ids", "").startswith(child_prefix) and n.get("level", 0) == level + 1 + ) + + retained = total_patients - child_patients + retention_rate = (retained / total_patients * 100) if total_patients > 0 else 0.0 + + results[node_ids] = { + "retained_patients": retained, + "total_patients": total_patients, + "retention_rate": round(retention_rate, 1), + "drug_sequence": parse_pathway_drugs(node_ids, level), + } + + return results