refactor: slim pathways.db from 351 MB to 3.5 MB by removing unused tables

Drop fact_interventions (440K rows), mv_patient_treatment_summary (35K rows),
ref_drug_snomed_mapping (144K rows), and processed_files — all unused since
the app moved to pre-computed pathway_nodes.

Key changes:
- Rewrite load_data() to source from pathway_nodes + pathway_refresh_log
- Remove 7 dead methods and 8 dead state vars from pathways_app.py
- Delete patient_data.py, load_snomed_mapping.py, test_large_dataset_performance.py
- Remove SQLiteDataLoader (depended on fact_interventions)
- Remove file tracking schema (processed_files tracked fact_interventions loads)
- Remove legacy diagnosis functions from diagnosis_lookup.py
- Add source_row_count migration for pathway_refresh_log
- Clean all cross-references in __init__.py, data_source.py, migrate.py
This commit is contained in:
Andrew Charlwood
2026-02-06 08:51:03 +00:00
parent bb93c1673e
commit 778ed99ef6
11 changed files with 95 additions and 3653 deletions
-65
View File
@@ -24,15 +24,6 @@ from data_processing.schema import (
REF_DRUG_DIRECTORY_MAP_SCHEMA,
REF_DRUG_INDICATION_CLUSTERS_SCHEMA,
REFERENCE_TABLES_SCHEMA,
# Fact table schemas
FACT_INTERVENTIONS_SCHEMA,
FACT_TABLES_SCHEMA,
# Materialized view schemas
MV_PATIENT_TREATMENT_SUMMARY_SCHEMA,
MATERIALIZED_VIEWS_SCHEMA,
# File tracking schemas
PROCESSED_FILES_SCHEMA,
FILE_TRACKING_SCHEMA,
# Combined schema
ALL_TABLES_SCHEMA,
# Reference table functions
@@ -40,16 +31,6 @@ from data_processing.schema import (
drop_reference_tables,
get_reference_table_counts,
verify_reference_tables_exist,
# Fact table functions
create_fact_tables,
drop_fact_tables,
get_fact_table_counts,
verify_fact_tables_exist,
# File tracking functions
create_file_tracking_tables,
drop_file_tracking_tables,
get_file_tracking_counts,
verify_file_tracking_tables_exist,
# Combined functions
create_all_tables,
drop_all_tables,
@@ -81,27 +62,12 @@ from data_processing.reference_data import (
from data_processing.loader import (
DataLoader,
FileDataLoader,
SQLiteDataLoader,
LoadResult,
get_loader,
REQUIRED_COLUMNS,
OPTIONAL_COLUMNS,
)
# Patient data migration functions
from data_processing.patient_data import (
PatientDataLoadResult,
load_patient_data,
get_patient_data_stats,
list_processed_files,
calculate_file_hash,
# Materialized view functions
MVRefreshResult,
refresh_patient_treatment_summary,
get_patient_summary_stats,
verify_mv_consistency,
)
# Snowflake connector
from data_processing.snowflake_connector import (
SnowflakeConnector,
@@ -165,15 +131,6 @@ __all__ = [
"REF_DRUG_DIRECTORY_MAP_SCHEMA",
"REF_DRUG_INDICATION_CLUSTERS_SCHEMA",
"REFERENCE_TABLES_SCHEMA",
# Fact table schemas
"FACT_INTERVENTIONS_SCHEMA",
"FACT_TABLES_SCHEMA",
# Materialized view schemas
"MV_PATIENT_TREATMENT_SUMMARY_SCHEMA",
"MATERIALIZED_VIEWS_SCHEMA",
# File tracking schemas
"PROCESSED_FILES_SCHEMA",
"FILE_TRACKING_SCHEMA",
# Combined schema
"ALL_TABLES_SCHEMA",
# Reference table functions
@@ -181,16 +138,6 @@ __all__ = [
"drop_reference_tables",
"get_reference_table_counts",
"verify_reference_tables_exist",
# Fact table functions
"create_fact_tables",
"drop_fact_tables",
"get_fact_table_counts",
"verify_fact_tables_exist",
# File tracking functions
"create_file_tracking_tables",
"drop_file_tracking_tables",
"get_file_tracking_counts",
"verify_file_tracking_tables_exist",
# Combined functions
"create_all_tables",
"drop_all_tables",
@@ -216,22 +163,10 @@ __all__ = [
# Data loader abstractions
"DataLoader",
"FileDataLoader",
"SQLiteDataLoader",
"LoadResult",
"get_loader",
"REQUIRED_COLUMNS",
"OPTIONAL_COLUMNS",
# Patient data migration
"PatientDataLoadResult",
"load_patient_data",
"get_patient_data_stats",
"list_processed_files",
"calculate_file_hash",
# Materialized view functions
"MVRefreshResult",
"refresh_patient_treatment_summary",
"get_patient_summary_stats",
"verify_mv_consistency",
# Snowflake connector
"SnowflakeConnector",
"SnowflakeConnectionError",