From c7e9398d65143fa13c86f1cd1b91735acdc01a8b Mon Sep 17 00:00:00 2001 From: Andrew Charlwood Date: Sat, 7 Feb 2026 03:47:53 +0000 Subject: [PATCH] feat: average administered doses chart tab (Task D.2) --- IMPLEMENTATION_PLAN.md | 20 +++--- dash_app/callbacks/chart.py | 28 ++++++++ dash_app/components/chart_card.py | 1 + dash_app/data/queries.py | 11 ++++ src/data_processing/pathway_queries.py | 87 ++++++++++++++++++++++++ src/visualization/plotly_generator.py | 91 ++++++++++++++++++++++++++ 6 files changed, 230 insertions(+), 8 deletions(-) diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md index db748c4..86ce454 100644 --- a/IMPLEMENTATION_PLAN.md +++ b/IMPLEMENTATION_PLAN.md @@ -205,14 +205,18 @@ Comprehensive review and improvement of all Plotly charts in the Dash dashboard. - **Checkpoint**: Trends tab shows drug usage over time (requires at least 2 refresh cycles for meaningful data) ### D.2 Average administered doses analysis -- [ ] Create `parse_average_administered(json_str)` parsing function in `src/data_processing/parsing.py`: - - Extract dose count arrays from the JSON `average_administered` column -- [ ] Create `get_dosing_distribution()` query in `pathway_queries.py`: - - Level 3 nodes with parsed `average_administered` JSON -- [ ] Create `create_dosing_distribution_figure(data, title)` in plotly_generator.py: - - Box/violin plot showing dose count distribution per drug -- [ ] Add as sub-option within Dosing tab or as separate tab -- **Checkpoint**: Dose distribution visible as box/violin plots +- [x] Create `get_dosing_distribution()` query in `pathway_queries.py`: + - Level 3 nodes with parsed `average_administered` JSON (position 0 = avg doses for drug) + - Aggregates across trusts using weighted averages by patient count + - Supports directory/trust filters. Returns `[{drug, directory, avg_doses, patients}]` +- [x] Add thin wrapper in `dash_app/data/queries.py` +- [x] Create `create_dosing_distribution_figure(data, title)` in plotly_generator.py: + - Horizontal bar chart (avg doses per drug, one bar per drug x directory) + - Colored by directory using DRUG_PALETTE, `_base_layout()` + `_smart_legend()` + - Dynamic height, patient count in hover +- [x] Add "Doses" tab to TAB_DEFINITIONS (9th tab) +- [x] Add `_render_doses()` helper + dispatch in `chart.py` +- **Checkpoint**: Doses tab shows average administered doses per drug, responds to filters ### D.3 Drug timeline (Gantt chart) - [x] Create `get_drug_timeline()` query in `pathway_queries.py`: diff --git a/dash_app/callbacks/chart.py b/dash_app/callbacks/chart.py index f0c3eeb..8f52dda 100644 --- a/dash_app/callbacks/chart.py +++ b/dash_app/callbacks/chart.py @@ -392,6 +392,31 @@ def _render_timeline(app_state, title): return create_drug_timeline_figure(data, title) +def _render_doses(app_state, title): + """Build the average administered doses figure from current filter state.""" + from dash_app.data.queries import get_dosing_distribution + from visualization.plotly_generator import create_dosing_distribution_figure + + filter_id = (app_state or {}).get("date_filter_id", "all_6mo") + chart_type = (app_state or {}).get("chart_type", "directory") + + selected_dirs = (app_state or {}).get("selected_directorates") or [] + selected_trusts = (app_state or {}).get("selected_trusts") or [] + directory = selected_dirs[0] if len(selected_dirs) == 1 else None + trust = selected_trusts[0] if len(selected_trusts) == 1 else None + + try: + data = get_dosing_distribution(filter_id, chart_type, directory, trust) + except Exception: + log.exception("Failed to load dosing distribution data") + return _empty_figure("Failed to load dosing distribution data.") + + if not data: + return _empty_figure("No dosing distribution data available.\nTry adjusting your filters.") + + return create_dosing_distribution_figure(data, title) + + def register_chart_callbacks(app): """Register tab switching, pathway data loading, and chart rendering callbacks.""" @@ -547,6 +572,9 @@ def register_chart_callbacks(app): elif active_tab == "timeline": fig = _render_timeline(app_state, title) + elif active_tab == "doses": + fig = _render_doses(app_state, title) + else: # Placeholder for charts not yet implemented tab_label = dict(TAB_DEFINITIONS).get(active_tab, active_tab) diff --git a/dash_app/components/chart_card.py b/dash_app/components/chart_card.py index 24ef5a0..09eafaa 100644 --- a/dash_app/components/chart_card.py +++ b/dash_app/components/chart_card.py @@ -13,6 +13,7 @@ TAB_DEFINITIONS = [ ("scatter", "Scatter"), ("network", "Network"), ("timeline", "Timeline"), + ("doses", "Doses"), ] # Full set retained for Trust Comparison dashboard (Phase 10.8) diff --git a/dash_app/data/queries.py b/dash_app/data/queries.py index f2e829f..cbdf315 100644 --- a/dash_app/data/queries.py +++ b/dash_app/data/queries.py @@ -29,6 +29,7 @@ from data_processing.pathway_queries import ( get_duration_cost_scatter as _get_duration_cost_scatter, get_drug_network as _get_drug_network, get_drug_timeline as _get_drug_timeline, + get_dosing_distribution as _get_dosing_distribution, ) DB_PATH = Path(__file__).resolve().parents[2] / "data" / "pathways.db" @@ -238,3 +239,13 @@ def get_drug_timeline( ) -> list[dict]: """Drug timeline data (first_seen, last_seen) for Gantt chart.""" return _get_drug_timeline(DB_PATH, date_filter_id, chart_type, directory, trust) + + +def get_dosing_distribution( + date_filter_id: str = "all_6mo", + chart_type: str = "directory", + directory: Optional[str] = None, + trust: Optional[str] = None, +) -> list[dict]: + """Average administered dose counts per drug.""" + return _get_dosing_distribution(DB_PATH, date_filter_id, chart_type, directory, trust) diff --git a/src/data_processing/pathway_queries.py b/src/data_processing/pathway_queries.py index a47db71..d72db83 100644 --- a/src/data_processing/pathway_queries.py +++ b/src/data_processing/pathway_queries.py @@ -1443,6 +1443,93 @@ def get_drug_timeline( conn.close() +def get_dosing_distribution( + db_path: Path, + date_filter_id: str, + chart_type: str, + directory: Optional[str] = None, + trust: Optional[str] = None, +) -> list[dict]: + """Level 3 drug nodes with average administered dose counts. + + Parses the average_administered JSON array (position 0 = avg doses for the drug). + Aggregates across trusts using weighted averages by patient count. + + Returns list of dicts sorted by avg_doses desc: + [{drug, directory, avg_doses, patients}] + """ + import json + + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + try: + where = ["date_filter_id = ?", "chart_type = ?", "level = 3", + "average_administered IS NOT NULL", "average_administered != ''"] + params: list = [date_filter_id, chart_type] + + if directory: + where.append("directory = ?") + params.append(directory) + if trust: + where.append("trust_name = ?") + params.append(trust) + + query = f""" + SELECT labels AS drug, directory, trust_name, + value AS patients, average_administered + FROM pathway_nodes + WHERE {' AND '.join(where)} + ORDER BY labels, directory + """ + rows = conn.execute(query, params).fetchall() + + # Aggregate across trusts: weighted average of dose count + agg = {} + for r in rows: + patients = r["patients"] or 0 + if patients == 0: + continue + + try: + arr = json.loads(r["average_administered"].replace("NaN", "null")) + except (json.JSONDecodeError, AttributeError): + continue + + # Position 0 is average doses for this drug + avg_doses = arr[0] if arr and arr[0] is not None else None + if avg_doses is None or avg_doses <= 0: + continue + + key = (r["directory"] or "", r["drug"]) + if key not in agg: + agg[key] = { + "drug": r["drug"], + "directory": r["directory"] or "", + "weighted_doses": 0.0, + "total_patients": 0, + } + agg[key]["weighted_doses"] += avg_doses * patients + agg[key]["total_patients"] += patients + + result = [] + for v in agg.values(): + tp = v["total_patients"] + if tp > 0: + result.append({ + "drug": v["drug"], + "directory": v["directory"], + "avg_doses": round(v["weighted_doses"] / tp, 1), + "patients": tp, + }) + + result.sort(key=lambda x: -x["avg_doses"]) + return result + except sqlite3.Error: + return [] + finally: + conn.close() + + def get_directorate_summary( db_path: Path, date_filter_id: str, diff --git a/src/visualization/plotly_generator.py b/src/visualization/plotly_generator.py index 2943e4b..ad34f09 100644 --- a/src/visualization/plotly_generator.py +++ b/src/visualization/plotly_generator.py @@ -2206,3 +2206,94 @@ def create_drug_timeline_figure(data: list[dict], title: str = "") -> go.Figure: fig.update_layout(**layout) return fig + + +def create_dosing_distribution_figure( + data: list[dict], title: str = "" +) -> go.Figure: + """Create horizontal bar chart of average administered doses per drug. + + Args: + data: list of dicts with keys: drug, directory, avg_doses, patients + title: chart title suffix + """ + if not data: + return go.Figure() + + display_title = f"Average Administered Doses — {title}" if title else "Average Administered Doses" + + # Group by directory for coloring + directories = sorted(set(d["directory"] for d in data)) + dir_colors = { + d: DRUG_PALETTE[i % len(DRUG_PALETTE)] + for i, d in enumerate(directories) + } + + single_directory = len(directories) == 1 + + # Sort by avg_doses descending + sorted_data = sorted(data, key=lambda x: x["avg_doses"]) + + # Build y-labels + if single_directory: + y_labels = [d["drug"] for d in sorted_data] + else: + y_labels = [f"{d['drug']} ({d['directory']})" for d in sorted_data] + + fig = go.Figure() + + # One trace per directory for legend grouping + shown_dirs = set() + for i, row in enumerate(sorted_data): + d = row["directory"] + show_legend = d not in shown_dirs + shown_dirs.add(d) + + fig.add_trace(go.Bar( + y=[y_labels[i]], + x=[row["avg_doses"]], + orientation="h", + marker_color=dir_colors[d], + name=d, + showlegend=show_legend, + legendgroup=d, + text=[f"{row['avg_doses']:.0f}"], + textposition="inside", + textfont=dict(color="white", size=11), + hovertemplate=( + f"{row['drug']}
" + f"Directory: {d}
" + f"Avg doses: {row['avg_doses']:.1f}
" + f"Patients: {row['patients']:,}" + "" + ), + )) + + n_bars = len(sorted_data) + bar_height = 24 + dynamic_height = max(400, n_bars * bar_height + 120) + + n_dirs = len(directories) + legend_margins = _smart_legend_margin(n_dirs) + legend = _smart_legend(n_dirs, legend_title="Directory") + + layout = _base_layout(display_title) + layout.update( + xaxis=dict( + title="Average Doses Administered", + gridcolor=GRID_COLOR, + zeroline=False, + ), + yaxis=dict( + automargin=True, + tickfont=dict(size=11), + ), + barmode="overlay", + height=dynamic_height, + margin=dict(t=60, l=8, **legend_margins), + legend=legend, + bargap=0.3, + ) + fig.update_layout(**layout) + + return fig