import webbrowser from itertools import groupby import os from typing import Optional import numpy as np import pandas as pd import plotly.graph_objects as go from core import AnalysisFilters, PathConfig, default_paths from core.logging_config import get_logger from tools import data # Import refactored analysis functions from analysis.pathway_analyzer import ( generate_icicle_chart as _generate_icicle_chart, prepare_data as _prepare_data, calculate_statistics as _calculate_statistics, build_hierarchy as _build_hierarchy, prepare_chart_data as _prepare_chart_data, ) # Import visualization functions from visualization.plotly_generator import ( create_icicle_figure as _create_icicle_figure, save_figure_html as _save_figure_html, figure_legacy as _figure_legacy, ) logger = get_logger(__name__) pd.options.mode.chained_assignment = None # default='warn' def human_format(num): num = float('{:.3g}'.format(num)) magnitude = 0 while abs(num) >= 1000: magnitude += 1 num /= 1000.0 return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude]) def main(dir, paths: Optional[PathConfig] = None): """ Load and process patient intervention data from a file. Uses the FileDataLoader abstraction to handle CSV/Parquet file loading with all necessary transformations (patient_id, drug_names, department_identification). Args: dir: Path to CSV or Parquet file paths: PathConfig for reference data locations (uses default_paths if None) Returns: DataFrame with processed patient intervention data """ from data_processing.loader import FileDataLoader if paths is None: paths = default_paths loader = FileDataLoader(file_path=dir, paths=paths) result = loader.load() logger.info("Initial data processing complete.") return result.df def drop_duplicate_treatments(df, ascending): df.sort_values(by=['Intervention Date'], ascending=ascending, inplace=True) df_treatment_steps = df.drop_duplicates(subset="UPIDTreatment", keep="first") if not ascending: df_treatment_steps.sort_values(by=['Intervention Date'], ascending=True, inplace=True) return df_treatment_steps def row_function(row): ids = "" parents = "N&WICS" count = row.count() for c in range(count): v = row[c] if type(v) != str: v = row[c + 1] if c == count - 1: ids = parents + " - " + v continue parents += " - " + v label = row[count - 1] value = parents + "," + label + "," + ids return value def count_list_values(x): return [len(list(group)) for key, group in groupby(sorted(x))] def sum_list_values(x): sum_list = [] for count in range(len(x["Drug Name"])): if count == 0: sum_list.append(sum(x["Price Actual"][ : x["Drug Name"][count]])) else: sum_list.append(sum(x["Price Actual"][x["Drug Name"][count-1] : (x["Drug Name"][count-1] + x["Drug Name"][count])])) return sum_list def remove_nan_string(y): return [x for x in y if str(x) != 'nan'] def min_max_treatment_dates(ice_df, row): ids = row[2] min_max = ice_df[ice_df["ids"].str.contains(ids)] min_date = str(min_max["First seen"].min().strftime('%Y-%m-%d')) max_date = str(min_max["Last seen"].max().strftime('%Y-%m-%d')) return min_date + ',' + max_date def start_date_drug(df, x): drug_count = x.notnull().sum() date_string = [] for d in range(drug_count): UPID_date_var = str(x.name) + str(x[d]) date = df.loc[UPID_date_var, "Intervention Date"] date_string.append(date) return date_string def end_date_drug(df, x): drug_count = x.notnull().sum() date_string = [] # Need to -1 from drug count as start date gets counted from notnull above for d in range(drug_count - 1): UPID_date_var = str(x.name) + str(x[d]) date = df.loc[UPID_date_var, "Intervention Date"] date_string.append(date) return date_string def list_to_string(x): list = x.ids.split(' - ') drug_list = list[len(list) - len(x.average_cost):] ret_string = "" for y in range(len(x.average_cost)): if (round(x.average_spacing[y], 0) > 1) and (round(x.average_administered[y], 0) > 2.5) and (int(x.value) > 0): string = "
" + str(drug_list[y]) + "
On average given " + str( round(x.average_administered[y], 1)) + \ " times with a " + str(round(int(x.average_spacing[y]) / 7, 1)) + " weekly interval (" \ + str(round((int(x.average_spacing[y]) / 7) * round(x.average_administered[y], 1), 0)) + " weeks total treatment length)" #"
Average annual cost per annum:" + \ #str(human_format( # (x.cost / x.value) / (((int(x.average_spacing[y]) / 7) * round(x.average_administered[y], 1))/ 52))) else: string = "
" + str(drug_list[y]) + "
On average given " + str( round(x.average_administered[y], 1)) + \ " times with a " + str(round(int(x.average_spacing[y]) / 7, 1)) + " weekly interval (" \ + str(round((int(x.average_spacing[y]) / 7) * round(x.average_administered[y], 1), 0)) + " weeks total treatment length)" #"
Average annual cost per annum unavailable" ret_string += string return ret_string def drug_frequency_average(x): drug_count = x.index.str.contains("drug_").sum() freq = [] for d in range(drug_count): if x["freq_" + str(d)] > 1: duration = ((x["end_date_" + str(d)] - x["start_date_" + str(d)]) / np.timedelta64(1, 'D')) if duration > 0: freq_calc = duration / (x["freq_" + str(d)] - 1) else: freq_calc = 0 else: freq_calc = 0 freq.append(freq_calc) return freq def cost_pp_pa(x): if x["avg_days"]/ np.timedelta64(1, 'D') > 0: return str(round(x["costpp"] / ((x["avg_days"] / np.timedelta64(1, 'D')) / 365), 2)) else: return "N/A" def generate_graph( df1, start_date=None, end_date=None, last_seen=None, save_dir=None, trustFilter=None, drugFilter=None, directorateFilter=None, title=None, minimum_num_patients=None, *, filters: Optional[AnalysisFilters] = None, paths: Optional[PathConfig] = None, ): """ Generate patient pathway icicle chart. This function can be called in two ways: 1. New style: Pass filters=AnalysisFilters(...) with all parameters encapsulated 2. Legacy style: Pass individual parameters (start_date, end_date, etc.) If both are provided, the filters object takes precedence. Args: df1: DataFrame with processed patient data filters: AnalysisFilters object with all filter parameters (preferred) paths: PathConfig object for file paths (optional, uses default_paths if not provided) Legacy parameters (used if filters is None): start_date, end_date, last_seen, save_dir, trustFilter, drugFilter, directorateFilter, title, minimum_num_patients """ # Use PathConfig for file paths if paths is None: paths = default_paths # Extract parameters from AnalysisFilters if provided if filters is not None: start_date = filters.start_date end_date = filters.end_date last_seen = filters.last_seen_date save_dir = filters.output_dir trustFilter = filters.trusts drugFilter = filters.drugs directorateFilter = filters.directories title = filters.custom_title minimum_num_patients = filters.minimum_patients df1["UPIDTreatment"] = df1["UPID"] + df1["Drug Name"] # Get average number of doses count org_codes = pd.read_csv(paths.org_codes_csv, index_col=1) df1["Provider Code"] = df1["Provider Code"].map(org_codes["Name"]) #df1.to_csv("./df1.csv", index=False) df1 = df1[(df1["Provider Code"].isin(trustFilter)) & (df1["Drug Name"].isin(drugFilter)) & (df1["Directory"].isin(directorateFilter))] if len(df1) == 0: logger.warning("No data found for selected filters.") return # Find total cost for each patient - Total cost is ~£110Mil, about 30% is unattributable to a patient (no UPID) cost_df = df1[["UPID", "Price Actual"]] total_costs = pd.DataFrame(cost_df.groupby("UPID").sum()) total_costs.rename(columns={"Price Actual": "Total cost"}, inplace=True) # Series to map directory directory_df = df1[["UPID", "Directory"]] directory_df.drop_duplicates("UPID", inplace=True) directory_df.set_index("UPID", inplace=True) logger.info("Filtering unrelated interventions") df_end_dates = drop_duplicate_treatments(df1, False) df1_unique = drop_duplicate_treatments(df1, True) logger.info("Identifying unique patients and interventions used") # Create list of total number of that drug for each patient df_drug_freq = df1.groupby("UPID").agg({"Drug Name": lambda x: list(x)}).reset_index().set_index("UPID") df_drug_cost = df1.groupby("UPID").agg({"Price Actual": lambda x: list(x)}).reset_index().set_index("UPID") df_drug_freq["Price Actual"] = df_drug_freq.index.map(df_drug_cost["Price Actual"]) #df_drug_freq["Price Actual"] = df_drug_freq["Price Actual"].map(df_drug_cost) df_drug_freq["Drug Name"] = df_drug_freq["Drug Name"].apply(count_list_values) df_drug_freq["Drug cost total"] = df_drug_freq.apply(lambda x: sum_list_values(x), axis=1) # Aggregate interventions & dates of interventions into transposed list by UPID df_drugs = df1_unique.groupby("UPID").agg({"Drug Name": lambda x: list(x)}).reset_index().set_index("UPID") df_dates = df1_unique.groupby("UPID").agg({"Intervention Date": lambda x: list(x)}).reset_index().set_index("UPID") df_end_dates = df_end_dates.groupby("UPID").agg({"Intervention Date": lambda x: list(x)}).reset_index().set_index("UPID") logger.info("Calculating each unique patient's intervention average frequency, cost and duration of each intervention") # The following sh*t show is to unwrap the lists into columns for different drugs, start/end dates, and average # frequency/average total injections of each one df_dates_unwrapped = pd.DataFrame(df_dates["Intervention Date"].values.tolist(), index=df_dates.index).add_prefix( 'date_') df_end_dates_unwrapped = pd.DataFrame(df_end_dates["Intervention Date"].values.tolist(), index=df_end_dates.index).add_prefix( 'date_end_') df_drugs_unwrapped = pd.DataFrame(df_drugs["Drug Name"].values.tolist(), index=df_drugs.index).add_prefix('drug_') df_freq_unwrapped = pd.DataFrame(df_drug_freq["Drug Name"].values.tolist(), index=df_drug_freq.index).add_prefix( 'freq_') start_dates = df1[["UPIDTreatment", "Intervention Date"]].sort_values(by=["Intervention Date"], ascending=True, inplace=False, ignore_index=True).drop_duplicates( subset="UPIDTreatment").set_index("UPIDTreatment") end_dates = df1[["UPIDTreatment", "Intervention Date"]].sort_values(by=["Intervention Date"], ascending=False, inplace=False, ignore_index=True).drop_duplicates( subset="UPIDTreatment").set_index("UPIDTreatment") df_drugs_unwrapped["start_dates"] = df_drugs_unwrapped.apply(lambda x: start_date_drug(start_dates, x), axis=1) df_ddrugs_unwrapped = pd.DataFrame(df_drugs_unwrapped["start_dates"].values.tolist(), index=df_drugs_unwrapped.index).add_prefix( 'start_date_') df_drugs_unwrapped.drop(["start_dates"], inplace=True, axis=1) df_drugs_unwrapped["end_dates"] = df_drugs_unwrapped.apply(lambda x: start_date_drug(end_dates, x), axis=1) df_dddrugs_unwrapped = pd.DataFrame(df_drugs_unwrapped["end_dates"].values.tolist(), index=df_drugs_unwrapped.index).add_prefix( 'end_date_') df_drugs_unwrapped.drop(["end_dates"], inplace=True, axis=1) df_drugs_unwrapped = pd.merge(df_drugs_unwrapped, df_ddrugs_unwrapped, left_index=True, right_index=True) df_drugs_unwrapped = pd.merge(df_drugs_unwrapped, df_dddrugs_unwrapped, left_index=True, right_index=True) df_dddddrugs_unwrapped = pd.DataFrame(df_drug_freq["Drug Name"].values.tolist(), index=df_drugs_unwrapped.index).add_prefix( 'freq_') df_drugs_unwrapped = pd.merge(df_drugs_unwrapped, df_dddddrugs_unwrapped, left_index=True, right_index=True) df_drugs_unwrapped["frequency"] = df_drugs_unwrapped.apply(lambda x: drug_frequency_average(x), axis=1) df_ddddddrugs_unwrapped = pd.DataFrame(df_drugs_unwrapped["frequency"].values.tolist(), index=df_drugs_unwrapped.index).add_prefix( 'spacing_') df_drugs_unwrapped = pd.merge(df_drugs_unwrapped, df_ddddddrugs_unwrapped, left_index=True, right_index=True) df_dddddddrugs_unwrapped = pd.DataFrame(df_drug_freq["Drug cost total"].values.tolist(), index=df_drugs_unwrapped.index).add_prefix('total_cost_drug_') df_drugs_unwrapped = pd.merge(df_drugs_unwrapped, df_dddddddrugs_unwrapped, left_index=True, right_index=True) df_drugs_unwrapped.drop(["frequency"], inplace=True, axis=1) # Insert first & last date seen into df (need to add last date seen) df_drugs_unwrapped.insert(0, "First seen", df_dates_unwrapped.min(axis=1)) df_drugs_unwrapped.insert(1, "Last seen", df_end_dates_unwrapped.max(axis=1)) # Merge info from activity data with grouped info, and total cost info patient_info = df1.drop_duplicates(subset="UPID", keep="first").set_index("UPID") patient_info = pd.merge(patient_info, df_drugs_unwrapped, left_index=True, right_index=True) patient_info = pd.merge(patient_info, df_freq_unwrapped, left_index=True, right_index=True) patient_info = pd.merge(patient_info, total_costs, left_index=True, right_index=True) #patient_info.to_csv("patient_info.csv", index=False) # Filter initiation based on years provided patient_info = patient_info[(patient_info['First seen'] >= str(start_date)) & ( patient_info['First seen'] < str(end_date))] if title == "": title = "Patients initiated from " + str(start_date) + " to " + str(end_date) # Filter last seen based on date provided patient_info = patient_info[patient_info['Last seen'] > str(last_seen)] # Remove patients with 0 drug, by filling blanks with NaN & dropping rows patient_info.drug_0.replace('N/A', np.nan, inplace=True) patient_info.dropna(subset=['drug_0'], inplace=True) # Calculate duation of treatment patient_info['Days treated'] = patient_info["Last seen"] - patient_info["First seen"] date_df = patient_info[["First seen", "Last seen", 'Days treated']] # Create df for ice chart with hierarchy of plot number_of_drugs = np.count_nonzero(patient_info.columns.str.startswith('drug_')) final_drug_index = patient_info.columns.to_list().index("drug_" + str(number_of_drugs - 1)) upid_drugs_df = patient_info.iloc[:, (final_drug_index - number_of_drugs + 1):final_drug_index + 1] upid_drugs_df.insert(0, "Trust", upid_drugs_df.index.str[:3]) upid_drugs_df.insert(1, "Directory", upid_drugs_df.index) upid_drugs_df["Trust"] = upid_drugs_df["Trust"].map(org_codes["Name"]) upid_drugs_df["Directory"] = upid_drugs_df["Directory"].map(directory_df["Directory"]) l_df = pd.DataFrame() ice_df2 = pd.DataFrame() ice_df = pd.DataFrame() upid_drugs_df["value"] = upid_drugs_df.apply(lambda x: row_function(x), axis=1) # Merge in date info upid_drugs_df = pd.merge(upid_drugs_df, date_df, left_index=True, right_index=True) upid_drugs_df["ids"] = upid_drugs_df["value"].str.split(',').str[2] avg_treatment_dfs = pd.DataFrame(upid_drugs_df.groupby("ids", as_index=False)["Days treated"].mean()).set_index("ids") value_dfs = pd.DataFrame(upid_drugs_df.groupby("value", as_index=False).size()).reset_index() first_seen_treatment_dfs = pd.DataFrame(upid_drugs_df.groupby("ids", as_index=False)["First seen"].min()).set_index( "ids") last_seen_treatment_dfs = pd.DataFrame(upid_drugs_df.groupby("ids", as_index=False)["Last seen"].max()).set_index( "ids") # Calculate total cost for parents upid_drugs_df["Cost"] = upid_drugs_df.index.map(total_costs["Total cost"]) cost_dfs = pd.DataFrame(upid_drugs_df.groupby("value", as_index=False)['Cost'].sum()).set_index("value", drop=True) # Calculate average dosing for each drug upid_drugs_df = pd.merge(upid_drugs_df, df_drugs_unwrapped, left_index=True, right_index=True) # frequency_dfs = pd.DataFrame(upid_drugs_df.groupby("value", as_index=False)['Cost'].sum()).set_index("value", drop=True) # Calculate average spacing between drugs spacing_average = pd.DataFrame(upid_drugs_df.groupby("value", as_index=False)[ [col for col in upid_drugs_df.columns if 'spacing_' in col]].mean()).set_index( "value", drop=True) spacing_average = spacing_average.round() spacing_average['combined'] = spacing_average.values.tolist() spacing_average["ids"] = spacing_average.index spacing_average["ids"] = spacing_average["ids"].str.split(',').str[2] spacing_average.set_index("ids", inplace=True) # Calculate average cost for each drug cost_average = pd.DataFrame(upid_drugs_df.groupby("value", as_index=False)[ [col for col in upid_drugs_df.columns if 'total_cost_drug_' in col]].mean()).set_index( "value", drop=True) cost_average = cost_average.round(2) cost_average['combined'] = cost_average.values.tolist() cost_average["ids"] = cost_average.index cost_average["ids"] = cost_average["ids"].str.split(',').str[2] cost_average.set_index("ids", inplace=True) # Calculate average number of doses freq_average = pd.DataFrame(upid_drugs_df.groupby("ids", as_index=False)[ [col for col in upid_drugs_df.columns if 'freq_' in col]].mean()).set_index("ids", drop=True) # freq_average = freq_average.round() freq_average['combined'] = freq_average.values.tolist() # Remove negative totals from "Cost" column num = cost_dfs._get_numeric_data() num[num < 0] = 0 value_dfs["Cost"] = value_dfs["value"].map(cost_dfs["Cost"]) ice_df[['parents', 'labels', 'ids']] = value_dfs["value"].str.split(',', expand=True) # ice_df["index"] = ice_df.ids # ice_df.set_index("index", inplace=True) ice_df["average_administered"] = ice_df["ids"].map(freq_average["combined"]) ice_df["cost"] = value_dfs["Cost"] ice_df["value"] = value_dfs["size"] ice_df["average_cost"] = ice_df["ids"].map(cost_average["combined"]) ice_df["average_cost"] = ice_df["average_cost"].apply(remove_nan_string) ice_df["average_spacing"] = ice_df["ids"].map(spacing_average["combined"]) ice_df["average_spacing"] = ice_df["average_spacing"].apply(remove_nan_string) ice_df["average_spacing"] = ice_df.apply(lambda x: list_to_string(x), axis=1) ice_df["average_spacing"] = ice_df["average_spacing"].str.replace("nan", "N/A") logger.info("Building graph dataframe structure.") # Add very top level of Trust new_row = pd.DataFrame({'parents': '', 'ids': "N&WICS", 'labels': 'N&WICS', 'value': 0, "cost": 0}, index=[0]) ice_df = pd.concat(objs=[ice_df, new_row], ignore_index=True, axis=0) # need to add parents as blocks... l3 = [x for x in ice_df.parents.unique() if x not in ice_df.ids] while len(l3) > 1: for l in l3: z = l.rfind("-") if z > 0: l_dict = {"parents": l[:z - 1], "ids": l, "value": 0, "labels": l[z + 2:], "cost": 0} l_df = pd.concat([l_df, pd.DataFrame(l_dict, index=[0])], ignore_index=True) ice_df2 = pd.concat([ice_df, l_df], ignore_index=True) l3 = [x for x in ice_df2.parents.unique() if x not in ice_df2.ids.unique()] ice_df = ice_df2.drop_duplicates("ids") ice_df["level"] = ice_df["ids"].str.count('-') ice_df = ice_df[~ice_df['labels'].isin(["COST", "CHARGE", "N/A"])] ice_df.sort_values(by=["level"], ascending=False, inplace=True, ignore_index=True) for index, row in ice_df.iterrows(): lookup_index = ice_df.index[ice_df['ids'] == row['parents']] ice_df.loc[lookup_index, 'value'] = ice_df.loc[lookup_index, "value"] + ice_df.loc[index, "value"] ice_df.loc[lookup_index, 'cost'] = ice_df.loc[lookup_index, "cost"] + ice_df.loc[index, 'cost'] # Sum of parent values to create denominator for percentage - FOR PATIENT NUMBER COLOUR GRADING colour_df = pd.DataFrame(ice_df.groupby(["parents"])["value"].sum()) ice_df['colour'] = ice_df["parents"].map(colour_df["value"]) ice_df['colour'] = ice_df['value']/ice_df['colour'] # Sum of parent values to create denominator for percentage - FOR COST COLOUR GRADING #colour_df = pd.DataFrame(ice_df.groupby(["parents"])["cost"].sum()) #ice_df['colour'] = ice_df["parents"].map(colour_df["cost"]) #ice_df['colour'] = ice_df['cost'] / ice_df['colour'] ice_df['costpp'] = ice_df['cost'] / ice_df['value'] # Treatment length info ice_df['avg_days'] = ice_df["ids"].map(avg_treatment_dfs["Days treated"]) ice_df['First seen'] = ice_df["ids"].map(first_seen_treatment_dfs["First seen"]) ice_df['Last seen'] = ice_df["ids"].map(last_seen_treatment_dfs["Last seen"]) ice_df["dates"] = ice_df.apply(lambda x: min_max_treatment_dates(ice_df, x), axis=1) ice_df[['First seen (Parent)', 'Last seen (Parent)']] = ice_df["dates"].str.split(',', expand=True) # Sort labels to be alphabetical # ice_df.sort_values(by=["labels"], ascending=True, inplace=True, ignore_index=True) ice_df['First seen'] = pd.to_datetime(ice_df['First seen']) ice_df['Last seen'] = pd.to_datetime(ice_df['Last seen']) ice_df["cost_pp_pa"] = ice_df.apply(lambda x: cost_pp_pa(x), axis=1) # Filter out rows where value is less than minimum number of patients ice_df = ice_df[ice_df['value'] >= minimum_num_patients] logger.info("Generating graph.") figure(ice_df, title, save_dir) return def figure(ice_df4, dir_string, save_dir): """ Create and display icicle figure (legacy interface). This function delegates to visualization.plotly_generator.figure_legacy() for backward compatibility. Args: ice_df4: DataFrame with chart data dir_string: Title string (used for filename and chart title) save_dir: Directory to save the HTML file """ _figure_legacy(ice_df4, dir_string, save_dir) return # fig = go.Figure(go.Icicle( # labels=ice_df4.labels, # ids=ice_df4.ids, # # count="branches", # parents=ice_df4.parents, # customdata=np.stack((ice_df4.value, ice_df4.colour, ice_df4.cost, ice_df4.costpp, first_seen, last_seen, # first_seen_parent, last_seen_parent, average_spacing, ice_df4.cost_pp_pa), axis=1), # values=ice_df4.value, # branchvalues="total", # marker=dict( # colors=ice_df4.colour, # colorscale='Viridis'), # maxdepth=3, # texttemplate='%{label} ' # '
Total patients: %{customdata[0]} - %{customdata[1]:.3p} of patients in level' # '
Total cost: £%{customdata[2]:.3~s}' # '
Average cost per patient: £%{customdata[3]:.3~s}' # '
Average cost per patient per annum: £%{customdata[9]:.3~s}', # hovertemplate='%{label}' # '
Total patients: %{customdata[0]} - %{customdata[1]:.3p} of patients in level' # '
Total cost: £%{customdata[2]:.3~s}' # '
Average cost per patient: £%{customdata[3]:.3~s}' # '
Average cost per patient per annum: £%{customdata[9]:.3~s}' # '
First seen: %{customdata[4]}' # '
Last seen (including further treatments): %{customdata[7]}' # '
Average treatment duration:' # '%{customdata[8]}' # '', # )) # #import os #def main(): # input = "ice_df.csv" # save_dir = os.path.dirname(os.path.abspath(__file__)) # dir = "debugging" # ice_df4 = pd.read_csv(input) # # ice_df4['First seen'] = pd.to_datetime(ice_df4['First seen']) # ice_df4['avg_days'] = pd.to_timedelta(ice_df4['avg_days']) # ice_df4['Last seen'] = pd.to_datetime(ice_df4['Last seen']) # figure(ice_df4, dir, save_dir) # #if __name__ == "__main__": # main() def generate_graph_v2( df: pd.DataFrame, start_date: str, end_date: str, last_seen_date: str, save_dir: str, trust_filter: list[str], drug_filter: list[str], directory_filter: list[str], minimum_num_patients: int = 0, title: str = "", paths: Optional[PathConfig] = None, ) -> Optional[go.Figure]: """ Generate patient pathway icicle chart using refactored pipeline. This is the modern API that uses the refactored analysis functions. It provides cleaner parameter names and returns the figure instead of automatically opening it in a browser. Args: df: DataFrame with processed patient intervention data start_date: Start date for patient initiation filter (YYYY-MM-DD) end_date: End date for patient initiation filter (YYYY-MM-DD) last_seen_date: Filter for patients last seen after this date save_dir: Directory to save the HTML file trust_filter: List of trust names to include drug_filter: List of drug names to include directory_filter: List of directories to include minimum_num_patients: Minimum number of patients to include a pathway title: Chart title (auto-generated from dates if empty) paths: PathConfig for file paths (uses default if None) Returns: Plotly Figure object, or None if no data """ if paths is None: paths = default_paths ice_df, final_title = _generate_icicle_chart( df=df, start_date=start_date, end_date=end_date, last_seen_date=last_seen_date, trust_filter=trust_filter, drug_filter=drug_filter, directory_filter=directory_filter, minimum_num_patients=minimum_num_patients, title=title, paths=paths, ) if ice_df is None or len(ice_df) == 0: return None fig = create_icicle_figure(ice_df, final_title) if save_dir: fig.write_html(f"{save_dir}/{final_title}.html") logger.info(f"Success! File saved to {save_dir}/{final_title}.html") return fig def create_icicle_figure(ice_df: pd.DataFrame, title: str) -> go.Figure: """ Create Plotly icicle figure from prepared DataFrame. This function delegates to visualization.plotly_generator.create_icicle_figure() for the actual figure generation. Args: ice_df: DataFrame with parents, ids, labels, value, colour etc. title: Chart title Returns: Plotly Figure object """ return _create_icicle_figure(ice_df, title)