refactor: slim pathways.db from 351 MB to 3.5 MB by removing unused tables
Drop fact_interventions (440K rows), mv_patient_treatment_summary (35K rows), ref_drug_snomed_mapping (144K rows), and processed_files — all unused since the app moved to pre-computed pathway_nodes. Key changes: - Rewrite load_data() to source from pathway_nodes + pathway_refresh_log - Remove 7 dead methods and 8 dead state vars from pathways_app.py - Delete patient_data.py, load_snomed_mapping.py, test_large_dataset_performance.py - Remove SQLiteDataLoader (depended on fact_interventions) - Remove file tracking schema (processed_files tracked fact_interventions loads) - Remove legacy diagnosis functions from diagnosis_lookup.py - Add source_row_count migration for pathway_refresh_log - Clean all cross-references in __init__.py, data_source.py, migrate.py
This commit is contained in:
@@ -176,6 +176,7 @@ def log_refresh_complete(
|
||||
record_count: int,
|
||||
date_filter_counts: dict[str, int],
|
||||
duration_seconds: float,
|
||||
source_row_count: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Log the successful completion of a refresh operation."""
|
||||
conn.execute("""
|
||||
@@ -184,13 +185,15 @@ def log_refresh_complete(
|
||||
status = 'completed',
|
||||
record_count = ?,
|
||||
date_filter_counts = ?,
|
||||
processing_duration_seconds = ?
|
||||
processing_duration_seconds = ?,
|
||||
source_row_count = ?
|
||||
WHERE refresh_id = ?
|
||||
""", (
|
||||
datetime.now().isoformat(),
|
||||
record_count,
|
||||
json.dumps(date_filter_counts),
|
||||
duration_seconds,
|
||||
source_row_count,
|
||||
refresh_id,
|
||||
))
|
||||
conn.commit()
|
||||
@@ -517,6 +520,7 @@ def refresh_pathways(
|
||||
record_count=stats["total_records"],
|
||||
date_filter_counts=stats["date_filter_counts"],
|
||||
duration_seconds=elapsed,
|
||||
source_row_count=stats.get("snowflake_rows"),
|
||||
)
|
||||
|
||||
# Verify final counts
|
||||
|
||||
Reference in New Issue
Block a user