diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md index b579783..35244e4 100644 --- a/IMPLEMENTATION_PLAN.md +++ b/IMPLEMENTATION_PLAN.md @@ -115,11 +115,11 @@ cd pathways_app && timeout 60 python -m reflex run 2>&1 | head -30 - `current_chart: str = "icicle"` ### 3.2 Data Loading -- [ ] Create `load_data()` method that reads from SQLite -- [ ] Populate available options for dropdowns (drugs, indications, directorates) -- [ ] Detect latest date in dataset for "to" date defaults -- [ ] Calculate total records and last updated timestamp -- [ ] Call on app initialization +- [x] Create `load_data()` method that reads from SQLite +- [x] Populate available options for dropdowns (drugs, indications, directorates) +- [x] Detect latest date in dataset for "to" date defaults +- [x] Calculate total records and last updated timestamp +- [x] Call on app initialization ### 3.3 Filter Logic - [ ] Create `apply_filters()` computed method that filters the data based on current state diff --git a/pathways_app/app_v2.py b/pathways_app/app_v2.py index efd2ddb..2163fe5 100644 --- a/pathways_app/app_v2.py +++ b/pathways_app/app_v2.py @@ -6,6 +6,7 @@ Design reference: DESIGN_SYSTEM.md """ from datetime import datetime, timedelta +from pathlib import Path from typing import Any import reflex as rx @@ -349,6 +350,117 @@ class AppState(rx.State): except (ValueError, TypeError): return "Unknown" + # ========================================================================= + # Data Loading Methods + # ========================================================================= + + def load_data(self): + """ + Load data from SQLite database on app initialization. + + This method: + 1. Connects to the SQLite database (data/pathways.db) + 2. Loads available drugs, indications, directorates from actual data + 3. Detects the latest date in the dataset for "to" date defaults + 4. Updates total_records, last_updated, and data_loaded state + """ + import sqlite3 + + db_path = Path("data/pathways.db") + + if not db_path.exists(): + self.error_message = "Database not found. Please run data migration first." + return + + try: + conn = sqlite3.connect(str(db_path)) + cursor = conn.cursor() + + # Get total records + cursor.execute("SELECT COUNT(*) FROM fact_interventions") + self.total_records = cursor.fetchone()[0] + + if self.total_records == 0: + self.error_message = "No data in database. Please run data migration." + conn.close() + return + + # Get available drugs (distinct, sorted) + cursor.execute(""" + SELECT DISTINCT drug_name_std + FROM fact_interventions + WHERE drug_name_std IS NOT NULL AND drug_name_std != '' + ORDER BY drug_name_std + """) + self.available_drugs = [row[0] for row in cursor.fetchall()] + + # Get available directories (distinct, sorted) + cursor.execute(""" + SELECT DISTINCT directory + FROM fact_interventions + WHERE directory IS NOT NULL AND directory != '' + ORDER BY directory + """) + self.available_directorates = [row[0] for row in cursor.fetchall()] + + # Get available indications from ref_drug_indication_clusters + cursor.execute(""" + SELECT DISTINCT indication + FROM ref_drug_indication_clusters + WHERE indication IS NOT NULL AND indication != '' + ORDER BY indication + """) + self.available_indications = [row[0] for row in cursor.fetchall()] + + # If no indications in reference table, use placeholder + if not self.available_indications: + self.available_indications = ["(No indications available)"] + + # Get date range from data + cursor.execute(""" + SELECT MIN(intervention_date), MAX(intervention_date) + FROM fact_interventions + """) + date_range = cursor.fetchone() + min_date, max_date = date_range + + # Update latest_date_in_data and set "to" date defaults + if max_date: + self.latest_date_in_data = max_date + self.last_seen_to_date = max_date + self.initiated_to_date = max_date + + # Set "from" date for last_seen filter (6 months before max_date) + max_dt = datetime.strptime(max_date, "%Y-%m-%d") + six_months_ago = max_dt - timedelta(days=180) + self.last_seen_from_date = six_months_ago.strftime("%Y-%m-%d") + + # Get unique patient count for KPIs + cursor.execute("SELECT COUNT(DISTINCT upid) FROM fact_interventions") + self.unique_patients = cursor.fetchone()[0] + + # Get unique drug count + self.total_drugs = len(self.available_drugs) + + # Get total cost + cursor.execute("SELECT SUM(price_actual) FROM fact_interventions") + total_cost_result = cursor.fetchone()[0] + self.total_cost = float(total_cost_result) if total_cost_result else 0.0 + + conn.close() + + # Set data_loaded and last_updated + self.data_loaded = True + self.last_updated = datetime.now().isoformat() + self.error_message = "" + + except sqlite3.Error as e: + self.error_message = f"Database error: {str(e)}" + self.data_loaded = False + except Exception as e: + self.error_message = f"Failed to load data: {str(e)}" + self.data_loaded = False + # ============================================================================= # Layout Components @@ -1332,5 +1444,5 @@ app = rx.App( ], ) -# Register page -app.add_page(index, route="/", title="HCD Analysis | Patient Pathways") +# Register page with on_load handler to load data on app initialization +app.add_page(index, route="/", title="HCD Analysis | Patient Pathways", on_load=AppState.load_data) diff --git a/progress.txt b/progress.txt index 6f668cb..9cb6999 100644 --- a/progress.txt +++ b/progress.txt @@ -440,3 +440,54 @@ Use `rx.cond(condition, true_value, false_value)` not Python `if`. - `pathways_app/pathways_app.py` — existing load patterns (search for "def load_") ### Blocked items: - None + +## Iteration 8 - 2026-02-04 +### Task: 3.2 Data Loading +### Why this task: +- Previous iteration (7) explicitly recommended continuing with Task 3.2 +- Natural progression through Phase 3: State Management +- Data loading is foundational for filter logic (Task 3.3) and KPI calculations (Task 3.4) +- No dependencies blocking this task +### Status: COMPLETE +### What was done: +- Added `Path` import from pathlib to app_v2.py +- Created comprehensive `load_data()` method in AppState that: + - Connects to SQLite database (data/pathways.db) + - Loads available_drugs from DISTINCT drug_name_std in fact_interventions (552 unique drugs) + - Loads available_directorates from DISTINCT directory in fact_interventions (29 unique) + - Loads available_indications from ref_drug_indication_clusters table (32 unique) + - Detects date range (2019-04-01 to 2025-02-28) and sets last_seen_to_date to max date + - Sets last_seen_from_date to 6 months before max date + - Populates KPI values: total_records (440,069), unique_patients, total_drugs, total_cost + - Sets data_loaded=True and last_updated timestamp on success + - Handles errors gracefully with meaningful error messages +- Added on_load handler to app.add_page() to trigger load_data on page load +### Validation results: +- Tier 1 (Code): + - `python -m py_compile pathways_app/app_v2.py` PASSED + - `python -c "from pathways_app.app_v2 import app, AppState"` PASSED + - AppState.load_data method exists and is callable + - Database queries tested independently — all return expected data +- Tier 2 (Visual): Deferred - requires running reflex with modified rxconfig +- Tier 3 (Functional): Database queries verified — 552 drugs, 29 directories, 32 indications, 440K records +### Files changed: +- Modified: pathways_app/app_v2.py (+80 lines - load_data method, on_load handler, Path import) +- Modified: IMPLEMENTATION_PLAN.md (marked 3.2 subtasks complete) +### Committed: [pending] +### Patterns discovered: +- Reflex on_load: Use `app.add_page(..., on_load=AppState.method_name)` to trigger method on page load +- SQLite in Reflex state: Import sqlite3 inside method to avoid issues with state serialization +- Date handling: Parse SQLite dates with datetime.strptime(date_str, "%Y-%m-%d") +- Reference tables: ref_drug_indication_clusters has 32 unique indications for dropdown +- Path handling: Use `Path("data/pathways.db")` for cross-platform compatibility +### Next iteration should: +- Continue with Task 3.3: Filter Logic +- Create `apply_filters()` computed method that filters data based on current filter state +- Handle initiated date filter (when enabled) +- Handle last seen date filter (when enabled) +- Handle drug/indication/directorate multi-select filters +- Return filtered data for chart generation +- Consider implementing as @rx.var computed property that returns filtered record count +- May need to store raw_data list in state or re-query SQLite based on filters +### Blocked items: +- None