feat: duration vs cost scatter plot tab (Task C.3)

This commit is contained in:
Andrew Charlwood
2026-02-07 03:25:39 +00:00
parent d4a2dea497
commit d8df41619d
6 changed files with 232 additions and 8 deletions
+76
View File
@@ -1209,6 +1209,82 @@ def get_pathway_depth_distribution(
conn.close()
def get_duration_cost_scatter(
db_path: Path,
date_filter_id: str,
chart_type: str,
directory: Optional[str] = None,
trust: Optional[str] = None,
) -> list[dict]:
"""Level 3 drug nodes with avg_days and cost_pp_pa for scatter plot.
Returns list of dicts: [{drug, directory, avg_days, cost_pp_pa, patients}]
Excludes nodes missing avg_days or cost_pp_pa. Aggregates across trusts
using weighted averages.
"""
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
try:
where = ["date_filter_id = ?", "chart_type = ?", "level = 3",
"avg_days IS NOT NULL", "cost_pp_pa IS NOT NULL"]
params: list = [date_filter_id, chart_type]
if directory:
where.append("directory = ?")
params.append(directory)
if trust:
where.append("trust_name = ?")
params.append(trust)
query = f"""
SELECT labels AS drug, directory,
value AS patients, avg_days, cost_pp_pa
FROM pathway_nodes
WHERE {' AND '.join(where)}
"""
rows = conn.execute(query, params).fetchall()
# Aggregate across trusts: weighted average of avg_days and cost_pp_pa
agg = {}
for r in rows:
key = (r["directory"] or "", r["drug"])
patients = r["patients"] or 0
days = _safe_float(r["avg_days"])
cost = _safe_float(r["cost_pp_pa"])
if patients == 0 or days == 0:
continue
if key not in agg:
agg[key] = {
"drug": r["drug"],
"directory": r["directory"] or "",
"weighted_days": 0.0,
"weighted_cost": 0.0,
"total_patients": 0,
}
agg[key]["weighted_days"] += days * patients
agg[key]["weighted_cost"] += cost * patients
agg[key]["total_patients"] += patients
result = []
for v in agg.values():
tp = v["total_patients"]
if tp > 0:
result.append({
"drug": v["drug"],
"directory": v["directory"],
"avg_days": round(v["weighted_days"] / tp, 1),
"cost_pp_pa": round(v["weighted_cost"] / tp, 0),
"patients": tp,
})
return result
except sqlite3.Error:
return []
finally:
conn.close()
def get_directorate_summary(
db_path: Path,
date_filter_id: str,
+106
View File
@@ -1891,3 +1891,109 @@ def create_pathway_depth_figure(
fig.update_layout(**layout)
return fig
def create_duration_cost_scatter_figure(
data: list[dict],
title: str = "",
) -> go.Figure:
"""Create a Duration vs Cost scatter plot from drug-level data.
Each point represents a drug (within a directory). x=avg treatment days,
y=annualised cost per patient, size=patient count, color=directory.
Quadrant lines at median values divide into 4 regions.
"""
if not data:
return go.Figure()
import statistics
display_title = f"Duration vs Cost — {title}" if title else "Duration vs Cost"
# Assign colors by directory
directories = sorted(set(d["directory"] for d in data))
dir_colors = {
d: DRUG_PALETTE[i % len(DRUG_PALETTE)]
for i, d in enumerate(directories)
}
# Global max patients for consistent sizing across directories
global_max_p = max((d["patients"] for d in data), default=1) or 1
# Build one trace per directory for legend grouping
fig = go.Figure()
for directory in directories:
subset = [d for d in data if d["directory"] == directory]
patients = [d["patients"] for d in subset]
# Scale marker size: min 8, max 40, relative to global max
sizes = [max(8, min(40, 8 + 32 * (p / global_max_p))) for p in patients]
fig.add_trace(go.Scatter(
x=[d["avg_days"] for d in subset],
y=[d["cost_pp_pa"] for d in subset],
mode="markers",
name=directory,
marker=dict(
size=sizes,
color=dir_colors[directory],
opacity=0.75,
line=dict(width=1, color="white"),
),
text=[d["drug"] for d in subset],
customdata=[[d["patients"], d["directory"], d["avg_days"], d["cost_pp_pa"]] for d in subset],
hovertemplate=(
"<b>%{text}</b><br>"
"Directory: %{customdata[1]}<br>"
"Avg duration: %{customdata[2]} days<br>"
"Cost p.a.: £%{customdata[3]:,.0f}<br>"
"Patients: %{customdata[0]:,}<br>"
"<extra></extra>"
),
))
# Quadrant lines at median values
all_days = [d["avg_days"] for d in data]
all_costs = [d["cost_pp_pa"] for d in data]
med_days = statistics.median(all_days)
med_cost = statistics.median(all_costs)
fig.add_hline(
y=med_cost, line_dash="dash", line_color=ANNOTATION_COLOR,
line_width=1,
annotation_text=f"Median £{med_cost:,.0f}",
annotation_position="top left",
annotation_font=dict(size=10, color=ANNOTATION_COLOR, family=CHART_FONT_FAMILY),
)
fig.add_vline(
x=med_days, line_dash="dash", line_color=ANNOTATION_COLOR,
line_width=1,
annotation_text=f"Median {med_days:.0f} days",
annotation_position="top right",
annotation_font=dict(size=10, color=ANNOTATION_COLOR, family=CHART_FONT_FAMILY),
)
n_dirs = len(directories)
legend = _smart_legend(n_dirs, "Directory")
legend_margins = _smart_legend_margin(n_dirs)
layout = _base_layout(display_title)
layout.update(
margin=dict(t=60, l=8, **legend_margins),
xaxis=dict(
title="Average Treatment Duration (days)",
gridcolor=GRID_COLOR,
zeroline=False,
),
yaxis=dict(
title="Cost per Patient per Annum (£)",
gridcolor=GRID_COLOR,
automargin=True,
zeroline=False,
),
legend=legend,
height=550,
)
fig.update_layout(**layout)
return fig