Files
HighCostDrugsDemo/data_processing/__init__.py
T
2026-02-04 13:04:29 +00:00

274 lines
7.3 KiB
Python

"""
Data processing module for NHS High-Cost Drug Patient Pathway Analysis Tool.
Contains SQLite database management, data loaders, and Snowflake integration.
Handles the migration from CSV-based storage to SQLite for improved performance.
Submodules:
database: SQLite connection management and schema definitions
loader: Data loading abstractions (CSV, SQLite, Snowflake)
snowflake_connector: Snowflake integration with SSO authentication
"""
from data_processing.database import (
DatabaseConfig,
DatabaseManager,
default_db_config,
default_db_manager,
)
from data_processing.schema import (
# Reference table schemas
REF_DRUG_NAMES_SCHEMA,
REF_ORGANIZATIONS_SCHEMA,
REF_DIRECTORIES_SCHEMA,
REF_DRUG_DIRECTORY_MAP_SCHEMA,
REF_DRUG_INDICATION_CLUSTERS_SCHEMA,
REFERENCE_TABLES_SCHEMA,
# Fact table schemas
FACT_INTERVENTIONS_SCHEMA,
FACT_TABLES_SCHEMA,
# Materialized view schemas
MV_PATIENT_TREATMENT_SUMMARY_SCHEMA,
MATERIALIZED_VIEWS_SCHEMA,
# File tracking schemas
PROCESSED_FILES_SCHEMA,
FILE_TRACKING_SCHEMA,
# Combined schema
ALL_TABLES_SCHEMA,
# Reference table functions
create_reference_tables,
drop_reference_tables,
get_reference_table_counts,
verify_reference_tables_exist,
# Fact table functions
create_fact_tables,
drop_fact_tables,
get_fact_table_counts,
verify_fact_tables_exist,
# File tracking functions
create_file_tracking_tables,
drop_file_tracking_tables,
get_file_tracking_counts,
verify_file_tracking_tables_exist,
# Combined functions
create_all_tables,
drop_all_tables,
get_all_table_counts,
verify_all_tables_exist,
)
# Reference data migration functions
from data_processing.reference_data import (
MigrationResult,
migrate_drug_names,
get_drug_name_counts,
verify_drug_names_migration,
migrate_organizations,
get_organization_counts,
verify_organizations_migration,
migrate_directories,
get_directory_counts,
verify_directories_migration,
migrate_drug_directory_map,
get_drug_directory_map_counts,
verify_drug_directory_map_migration,
migrate_drug_indication_clusters,
get_drug_indication_cluster_counts,
verify_drug_indication_clusters_migration,
)
# Data loader abstractions
from data_processing.loader import (
DataLoader,
FileDataLoader,
SQLiteDataLoader,
LoadResult,
get_loader,
REQUIRED_COLUMNS,
OPTIONAL_COLUMNS,
)
# Patient data migration functions
from data_processing.patient_data import (
PatientDataLoadResult,
load_patient_data,
get_patient_data_stats,
list_processed_files,
calculate_file_hash,
# Materialized view functions
MVRefreshResult,
refresh_patient_treatment_summary,
get_patient_summary_stats,
verify_mv_consistency,
)
# Snowflake connector
from data_processing.snowflake_connector import (
SnowflakeConnector,
SnowflakeConnectionError,
SnowflakeNotConfiguredError,
SnowflakeNotAvailableError,
ConnectionInfo,
get_connector,
reset_connector,
is_snowflake_available,
is_snowflake_configured,
SNOWFLAKE_AVAILABLE,
)
# Query result caching
from data_processing.cache import (
QueryCache,
CacheEntry,
CacheStats,
get_cache,
reset_cache,
is_cache_enabled,
)
# Data source management with fallback chain
from data_processing.data_source import (
DataSourceType,
DataSourceResult,
SourceStatus,
DataSourceManager,
get_data_source_manager,
get_data,
reset_data_source_manager,
)
# Diagnosis lookup (GP diagnosis validation)
from data_processing.diagnosis_lookup import (
ClusterSnomedCodes,
IndicationValidationResult,
DrugIndicationMatchRate,
get_drug_clusters,
get_drug_cluster_ids,
get_cluster_snomed_codes,
patient_has_indication,
validate_indication,
get_indication_match_rate,
batch_validate_indications,
get_available_clusters,
)
__all__ = [
# Database management
"DatabaseConfig",
"DatabaseManager",
"default_db_config",
"default_db_manager",
# Reference table schemas
"REF_DRUG_NAMES_SCHEMA",
"REF_ORGANIZATIONS_SCHEMA",
"REF_DIRECTORIES_SCHEMA",
"REF_DRUG_DIRECTORY_MAP_SCHEMA",
"REF_DRUG_INDICATION_CLUSTERS_SCHEMA",
"REFERENCE_TABLES_SCHEMA",
# Fact table schemas
"FACT_INTERVENTIONS_SCHEMA",
"FACT_TABLES_SCHEMA",
# Materialized view schemas
"MV_PATIENT_TREATMENT_SUMMARY_SCHEMA",
"MATERIALIZED_VIEWS_SCHEMA",
# File tracking schemas
"PROCESSED_FILES_SCHEMA",
"FILE_TRACKING_SCHEMA",
# Combined schema
"ALL_TABLES_SCHEMA",
# Reference table functions
"create_reference_tables",
"drop_reference_tables",
"get_reference_table_counts",
"verify_reference_tables_exist",
# Fact table functions
"create_fact_tables",
"drop_fact_tables",
"get_fact_table_counts",
"verify_fact_tables_exist",
# File tracking functions
"create_file_tracking_tables",
"drop_file_tracking_tables",
"get_file_tracking_counts",
"verify_file_tracking_tables_exist",
# Combined functions
"create_all_tables",
"drop_all_tables",
"get_all_table_counts",
"verify_all_tables_exist",
# Reference data migration
"MigrationResult",
"migrate_drug_names",
"get_drug_name_counts",
"verify_drug_names_migration",
"migrate_organizations",
"get_organization_counts",
"verify_organizations_migration",
"migrate_directories",
"get_directory_counts",
"verify_directories_migration",
"migrate_drug_directory_map",
"get_drug_directory_map_counts",
"verify_drug_directory_map_migration",
"migrate_drug_indication_clusters",
"get_drug_indication_cluster_counts",
"verify_drug_indication_clusters_migration",
# Data loader abstractions
"DataLoader",
"FileDataLoader",
"SQLiteDataLoader",
"LoadResult",
"get_loader",
"REQUIRED_COLUMNS",
"OPTIONAL_COLUMNS",
# Patient data migration
"PatientDataLoadResult",
"load_patient_data",
"get_patient_data_stats",
"list_processed_files",
"calculate_file_hash",
# Materialized view functions
"MVRefreshResult",
"refresh_patient_treatment_summary",
"get_patient_summary_stats",
"verify_mv_consistency",
# Snowflake connector
"SnowflakeConnector",
"SnowflakeConnectionError",
"SnowflakeNotConfiguredError",
"SnowflakeNotAvailableError",
"ConnectionInfo",
"get_connector",
"reset_connector",
"is_snowflake_available",
"is_snowflake_configured",
"SNOWFLAKE_AVAILABLE",
# Query result caching
"QueryCache",
"CacheEntry",
"CacheStats",
"get_cache",
"reset_cache",
"is_cache_enabled",
# Data source management with fallback chain
"DataSourceType",
"DataSourceResult",
"SourceStatus",
"DataSourceManager",
"get_data_source_manager",
"get_data",
"reset_data_source_manager",
# Diagnosis lookup
"ClusterSnomedCodes",
"IndicationValidationResult",
"DrugIndicationMatchRate",
"get_drug_clusters",
"get_drug_cluster_ids",
"get_cluster_snomed_codes",
"patient_has_indication",
"validate_indication",
"get_indication_match_rate",
"batch_validate_indications",
"get_available_clusters",
]