274 lines
7.3 KiB
Python
274 lines
7.3 KiB
Python
"""
|
|
Data processing module for NHS High-Cost Drug Patient Pathway Analysis Tool.
|
|
|
|
Contains SQLite database management, data loaders, and Snowflake integration.
|
|
Handles the migration from CSV-based storage to SQLite for improved performance.
|
|
|
|
Submodules:
|
|
database: SQLite connection management and schema definitions
|
|
loader: Data loading abstractions (CSV, SQLite, Snowflake)
|
|
snowflake_connector: Snowflake integration with SSO authentication
|
|
"""
|
|
|
|
from data_processing.database import (
|
|
DatabaseConfig,
|
|
DatabaseManager,
|
|
default_db_config,
|
|
default_db_manager,
|
|
)
|
|
from data_processing.schema import (
|
|
# Reference table schemas
|
|
REF_DRUG_NAMES_SCHEMA,
|
|
REF_ORGANIZATIONS_SCHEMA,
|
|
REF_DIRECTORIES_SCHEMA,
|
|
REF_DRUG_DIRECTORY_MAP_SCHEMA,
|
|
REF_DRUG_INDICATION_CLUSTERS_SCHEMA,
|
|
REFERENCE_TABLES_SCHEMA,
|
|
# Fact table schemas
|
|
FACT_INTERVENTIONS_SCHEMA,
|
|
FACT_TABLES_SCHEMA,
|
|
# Materialized view schemas
|
|
MV_PATIENT_TREATMENT_SUMMARY_SCHEMA,
|
|
MATERIALIZED_VIEWS_SCHEMA,
|
|
# File tracking schemas
|
|
PROCESSED_FILES_SCHEMA,
|
|
FILE_TRACKING_SCHEMA,
|
|
# Combined schema
|
|
ALL_TABLES_SCHEMA,
|
|
# Reference table functions
|
|
create_reference_tables,
|
|
drop_reference_tables,
|
|
get_reference_table_counts,
|
|
verify_reference_tables_exist,
|
|
# Fact table functions
|
|
create_fact_tables,
|
|
drop_fact_tables,
|
|
get_fact_table_counts,
|
|
verify_fact_tables_exist,
|
|
# File tracking functions
|
|
create_file_tracking_tables,
|
|
drop_file_tracking_tables,
|
|
get_file_tracking_counts,
|
|
verify_file_tracking_tables_exist,
|
|
# Combined functions
|
|
create_all_tables,
|
|
drop_all_tables,
|
|
get_all_table_counts,
|
|
verify_all_tables_exist,
|
|
)
|
|
|
|
# Reference data migration functions
|
|
from data_processing.reference_data import (
|
|
MigrationResult,
|
|
migrate_drug_names,
|
|
get_drug_name_counts,
|
|
verify_drug_names_migration,
|
|
migrate_organizations,
|
|
get_organization_counts,
|
|
verify_organizations_migration,
|
|
migrate_directories,
|
|
get_directory_counts,
|
|
verify_directories_migration,
|
|
migrate_drug_directory_map,
|
|
get_drug_directory_map_counts,
|
|
verify_drug_directory_map_migration,
|
|
migrate_drug_indication_clusters,
|
|
get_drug_indication_cluster_counts,
|
|
verify_drug_indication_clusters_migration,
|
|
)
|
|
|
|
# Data loader abstractions
|
|
from data_processing.loader import (
|
|
DataLoader,
|
|
FileDataLoader,
|
|
SQLiteDataLoader,
|
|
LoadResult,
|
|
get_loader,
|
|
REQUIRED_COLUMNS,
|
|
OPTIONAL_COLUMNS,
|
|
)
|
|
|
|
# Patient data migration functions
|
|
from data_processing.patient_data import (
|
|
PatientDataLoadResult,
|
|
load_patient_data,
|
|
get_patient_data_stats,
|
|
list_processed_files,
|
|
calculate_file_hash,
|
|
# Materialized view functions
|
|
MVRefreshResult,
|
|
refresh_patient_treatment_summary,
|
|
get_patient_summary_stats,
|
|
verify_mv_consistency,
|
|
)
|
|
|
|
# Snowflake connector
|
|
from data_processing.snowflake_connector import (
|
|
SnowflakeConnector,
|
|
SnowflakeConnectionError,
|
|
SnowflakeNotConfiguredError,
|
|
SnowflakeNotAvailableError,
|
|
ConnectionInfo,
|
|
get_connector,
|
|
reset_connector,
|
|
is_snowflake_available,
|
|
is_snowflake_configured,
|
|
SNOWFLAKE_AVAILABLE,
|
|
)
|
|
|
|
# Query result caching
|
|
from data_processing.cache import (
|
|
QueryCache,
|
|
CacheEntry,
|
|
CacheStats,
|
|
get_cache,
|
|
reset_cache,
|
|
is_cache_enabled,
|
|
)
|
|
|
|
# Data source management with fallback chain
|
|
from data_processing.data_source import (
|
|
DataSourceType,
|
|
DataSourceResult,
|
|
SourceStatus,
|
|
DataSourceManager,
|
|
get_data_source_manager,
|
|
get_data,
|
|
reset_data_source_manager,
|
|
)
|
|
|
|
# Diagnosis lookup (GP diagnosis validation)
|
|
from data_processing.diagnosis_lookup import (
|
|
ClusterSnomedCodes,
|
|
IndicationValidationResult,
|
|
DrugIndicationMatchRate,
|
|
get_drug_clusters,
|
|
get_drug_cluster_ids,
|
|
get_cluster_snomed_codes,
|
|
patient_has_indication,
|
|
validate_indication,
|
|
get_indication_match_rate,
|
|
batch_validate_indications,
|
|
get_available_clusters,
|
|
)
|
|
|
|
__all__ = [
|
|
# Database management
|
|
"DatabaseConfig",
|
|
"DatabaseManager",
|
|
"default_db_config",
|
|
"default_db_manager",
|
|
# Reference table schemas
|
|
"REF_DRUG_NAMES_SCHEMA",
|
|
"REF_ORGANIZATIONS_SCHEMA",
|
|
"REF_DIRECTORIES_SCHEMA",
|
|
"REF_DRUG_DIRECTORY_MAP_SCHEMA",
|
|
"REF_DRUG_INDICATION_CLUSTERS_SCHEMA",
|
|
"REFERENCE_TABLES_SCHEMA",
|
|
# Fact table schemas
|
|
"FACT_INTERVENTIONS_SCHEMA",
|
|
"FACT_TABLES_SCHEMA",
|
|
# Materialized view schemas
|
|
"MV_PATIENT_TREATMENT_SUMMARY_SCHEMA",
|
|
"MATERIALIZED_VIEWS_SCHEMA",
|
|
# File tracking schemas
|
|
"PROCESSED_FILES_SCHEMA",
|
|
"FILE_TRACKING_SCHEMA",
|
|
# Combined schema
|
|
"ALL_TABLES_SCHEMA",
|
|
# Reference table functions
|
|
"create_reference_tables",
|
|
"drop_reference_tables",
|
|
"get_reference_table_counts",
|
|
"verify_reference_tables_exist",
|
|
# Fact table functions
|
|
"create_fact_tables",
|
|
"drop_fact_tables",
|
|
"get_fact_table_counts",
|
|
"verify_fact_tables_exist",
|
|
# File tracking functions
|
|
"create_file_tracking_tables",
|
|
"drop_file_tracking_tables",
|
|
"get_file_tracking_counts",
|
|
"verify_file_tracking_tables_exist",
|
|
# Combined functions
|
|
"create_all_tables",
|
|
"drop_all_tables",
|
|
"get_all_table_counts",
|
|
"verify_all_tables_exist",
|
|
# Reference data migration
|
|
"MigrationResult",
|
|
"migrate_drug_names",
|
|
"get_drug_name_counts",
|
|
"verify_drug_names_migration",
|
|
"migrate_organizations",
|
|
"get_organization_counts",
|
|
"verify_organizations_migration",
|
|
"migrate_directories",
|
|
"get_directory_counts",
|
|
"verify_directories_migration",
|
|
"migrate_drug_directory_map",
|
|
"get_drug_directory_map_counts",
|
|
"verify_drug_directory_map_migration",
|
|
"migrate_drug_indication_clusters",
|
|
"get_drug_indication_cluster_counts",
|
|
"verify_drug_indication_clusters_migration",
|
|
# Data loader abstractions
|
|
"DataLoader",
|
|
"FileDataLoader",
|
|
"SQLiteDataLoader",
|
|
"LoadResult",
|
|
"get_loader",
|
|
"REQUIRED_COLUMNS",
|
|
"OPTIONAL_COLUMNS",
|
|
# Patient data migration
|
|
"PatientDataLoadResult",
|
|
"load_patient_data",
|
|
"get_patient_data_stats",
|
|
"list_processed_files",
|
|
"calculate_file_hash",
|
|
# Materialized view functions
|
|
"MVRefreshResult",
|
|
"refresh_patient_treatment_summary",
|
|
"get_patient_summary_stats",
|
|
"verify_mv_consistency",
|
|
# Snowflake connector
|
|
"SnowflakeConnector",
|
|
"SnowflakeConnectionError",
|
|
"SnowflakeNotConfiguredError",
|
|
"SnowflakeNotAvailableError",
|
|
"ConnectionInfo",
|
|
"get_connector",
|
|
"reset_connector",
|
|
"is_snowflake_available",
|
|
"is_snowflake_configured",
|
|
"SNOWFLAKE_AVAILABLE",
|
|
# Query result caching
|
|
"QueryCache",
|
|
"CacheEntry",
|
|
"CacheStats",
|
|
"get_cache",
|
|
"reset_cache",
|
|
"is_cache_enabled",
|
|
# Data source management with fallback chain
|
|
"DataSourceType",
|
|
"DataSourceResult",
|
|
"SourceStatus",
|
|
"DataSourceManager",
|
|
"get_data_source_manager",
|
|
"get_data",
|
|
"reset_data_source_manager",
|
|
# Diagnosis lookup
|
|
"ClusterSnomedCodes",
|
|
"IndicationValidationResult",
|
|
"DrugIndicationMatchRate",
|
|
"get_drug_clusters",
|
|
"get_drug_cluster_ids",
|
|
"get_cluster_snomed_codes",
|
|
"patient_has_indication",
|
|
"validate_indication",
|
|
"get_indication_match_rate",
|
|
"batch_validate_indications",
|
|
"get_available_clusters",
|
|
]
|