refactor: slim pathways.db from 351 MB to 3.5 MB by removing unused tables
Drop fact_interventions (440K rows), mv_patient_treatment_summary (35K rows), ref_drug_snomed_mapping (144K rows), and processed_files — all unused since the app moved to pre-computed pathway_nodes. Key changes: - Rewrite load_data() to source from pathway_nodes + pathway_refresh_log - Remove 7 dead methods and 8 dead state vars from pathways_app.py - Delete patient_data.py, load_snomed_mapping.py, test_large_dataset_performance.py - Remove SQLiteDataLoader (depended on fact_interventions) - Remove file tracking schema (processed_files tracked fact_interventions loads) - Remove legacy diagnosis functions from diagnosis_lookup.py - Add source_row_count migration for pathway_refresh_log - Clean all cross-references in __init__.py, data_source.py, migrate.py
This commit is contained in:
+27
-438
@@ -115,43 +115,6 @@ CREATE INDEX IF NOT EXISTS idx_ref_drug_indication_clusters_cluster ON ref_drug_
|
||||
CREATE INDEX IF NOT EXISTS idx_ref_drug_indication_clusters_indication ON ref_drug_indication_clusters(indication);
|
||||
"""
|
||||
|
||||
REF_DRUG_SNOMED_MAPPING_SCHEMA = """
|
||||
-- Direct SNOMED code mapping from drug to indication to GP diagnosis codes
|
||||
-- Source: data/drug_snomed_mapping_enriched.csv (163K rows)
|
||||
-- Used for direct GP record matching to assign diagnosis-based directorates
|
||||
-- and to support indication-based pathway hierarchy (Trust → Search_Term → Drug → Pathway)
|
||||
CREATE TABLE IF NOT EXISTS ref_drug_snomed_mapping (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
drug_name TEXT NOT NULL, -- Original drug name from mapping
|
||||
indication TEXT NOT NULL, -- Specific indication (603 unique values)
|
||||
ta_id TEXT, -- NICE TA reference (e.g., TA568)
|
||||
search_term TEXT NOT NULL, -- Simplified grouping (187 unique values)
|
||||
snomed_code TEXT NOT NULL, -- SNOMED CT code for GP record matching
|
||||
snomed_description TEXT, -- SNOMED code description
|
||||
cleaned_drug_name TEXT NOT NULL, -- Standardized drug name for matching
|
||||
primary_directorate TEXT, -- Primary directorate for this indication
|
||||
all_directorates TEXT, -- Pipe-separated list of valid directorates
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(drug_name, indication, snomed_code)
|
||||
);
|
||||
|
||||
-- Index for looking up SNOMED codes by drug name (most common access pattern)
|
||||
CREATE INDEX IF NOT EXISTS idx_ref_drug_snomed_mapping_drug ON ref_drug_snomed_mapping(drug_name);
|
||||
|
||||
-- Index for looking up by cleaned drug name (standardized matching)
|
||||
CREATE INDEX IF NOT EXISTS idx_ref_drug_snomed_mapping_cleaned ON ref_drug_snomed_mapping(cleaned_drug_name);
|
||||
|
||||
-- Index for looking up by SNOMED code (reverse lookup from GP record)
|
||||
CREATE INDEX IF NOT EXISTS idx_ref_drug_snomed_mapping_snomed ON ref_drug_snomed_mapping(snomed_code);
|
||||
|
||||
-- Index for grouping by search_term (indication-based hierarchy)
|
||||
CREATE INDEX IF NOT EXISTS idx_ref_drug_snomed_mapping_search_term ON ref_drug_snomed_mapping(search_term);
|
||||
|
||||
-- Composite index for drug + snomed code (common lookup pattern)
|
||||
CREATE INDEX IF NOT EXISTS idx_ref_drug_snomed_mapping_drug_snomed
|
||||
ON ref_drug_snomed_mapping(cleaned_drug_name, snomed_code);
|
||||
"""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pathway Data Architecture Schemas
|
||||
@@ -278,6 +241,7 @@ CREATE TABLE IF NOT EXISTS pathway_refresh_log (
|
||||
snowflake_query_date_from TEXT, -- Start date of Snowflake query
|
||||
snowflake_query_date_to TEXT, -- End date of Snowflake query
|
||||
processing_duration_seconds REAL, -- How long the refresh took
|
||||
source_row_count INTEGER, -- Number of Snowflake rows fetched
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
@@ -301,208 +265,6 @@ PATHWAY_TABLES_SCHEMA = f"""
|
||||
"""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fact Table Schemas
|
||||
# =============================================================================
|
||||
|
||||
FACT_INTERVENTIONS_SCHEMA = """
|
||||
-- Patient intervention records (fact table)
|
||||
-- Source: HCD activity data (CSV/Parquet files or Snowflake)
|
||||
-- This is the main fact table storing all patient intervention events
|
||||
CREATE TABLE IF NOT EXISTS fact_interventions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
|
||||
-- Patient identification
|
||||
upid TEXT NOT NULL, -- Unique Patient ID (Provider Code[:3] + PersonKey)
|
||||
provider_code TEXT NOT NULL, -- Original provider code (3-5 chars)
|
||||
person_key TEXT NOT NULL, -- Patient key from source system
|
||||
|
||||
-- Intervention details
|
||||
drug_name_raw TEXT, -- Original drug name from source
|
||||
drug_name_std TEXT NOT NULL, -- Standardized drug name (via ref_drug_names)
|
||||
intervention_date DATE NOT NULL, -- Date of intervention
|
||||
price_actual REAL NOT NULL DEFAULT 0, -- Cost of intervention in GBP
|
||||
|
||||
-- Organization and directory
|
||||
org_name TEXT, -- Organization name (cleaned, no commas)
|
||||
directory TEXT, -- Medical directory/specialty (may be "Undefined")
|
||||
|
||||
-- Source tracking
|
||||
source_file TEXT, -- Original file this record came from
|
||||
loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
-- Additional clinical fields (optional, used in directory fallback logic)
|
||||
treatment_function_code INTEGER,
|
||||
additional_detail_1 TEXT,
|
||||
additional_detail_2 TEXT,
|
||||
additional_detail_3 TEXT,
|
||||
additional_detail_4 TEXT,
|
||||
additional_detail_5 TEXT
|
||||
);
|
||||
|
||||
-- Primary indexes for common filter patterns used in generate_graph()
|
||||
-- UPID: Used for patient grouping, pathway analysis
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_interventions_upid ON fact_interventions(upid);
|
||||
|
||||
-- Drug name (standardized): Used for drug filtering
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_interventions_drug ON fact_interventions(drug_name_std);
|
||||
|
||||
-- Intervention date: Used for date range filtering (start_date, end_date, last_seen)
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_interventions_date ON fact_interventions(intervention_date);
|
||||
|
||||
-- Directory: Used for directory/specialty filtering
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_interventions_directory ON fact_interventions(directory);
|
||||
|
||||
-- Organization: Used for trust filtering (Provider Code maps to org_name)
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_interventions_org ON fact_interventions(org_name);
|
||||
|
||||
-- Composite index for common filter combination (trust + drug + directory)
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_interventions_composite
|
||||
ON fact_interventions(org_name, drug_name_std, directory);
|
||||
|
||||
-- Composite index for date-based patient analysis
|
||||
CREATE INDEX IF NOT EXISTS idx_fact_interventions_upid_date
|
||||
ON fact_interventions(upid, intervention_date);
|
||||
"""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Materialized View Schemas (Cached Aggregations)
|
||||
# =============================================================================
|
||||
|
||||
MV_PATIENT_TREATMENT_SUMMARY_SCHEMA = """
|
||||
-- Materialized view of patient treatment summaries
|
||||
-- Pre-computed aggregations per patient for faster pathway analysis
|
||||
-- Refreshed when fact_interventions data changes
|
||||
CREATE TABLE IF NOT EXISTS mv_patient_treatment_summary (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
|
||||
-- Patient identification
|
||||
upid TEXT NOT NULL UNIQUE, -- Unique Patient ID
|
||||
|
||||
-- Organization and directory (for filtering)
|
||||
org_name TEXT, -- Organization name (first org seen)
|
||||
directory TEXT, -- Primary directory (first directory assigned)
|
||||
|
||||
-- Date range
|
||||
first_seen_date DATE NOT NULL, -- First intervention date
|
||||
last_seen_date DATE NOT NULL, -- Last intervention date
|
||||
days_treated INTEGER NOT NULL DEFAULT 0, -- Duration: last_seen - first_seen
|
||||
|
||||
-- Cost aggregations
|
||||
total_cost REAL NOT NULL DEFAULT 0, -- Sum of all intervention costs
|
||||
avg_cost_per_intervention REAL, -- Average cost per intervention
|
||||
|
||||
-- Treatment summary
|
||||
intervention_count INTEGER NOT NULL DEFAULT 0, -- Total number of interventions
|
||||
unique_drug_count INTEGER NOT NULL DEFAULT 0, -- Number of distinct drugs
|
||||
|
||||
-- Drug sequence (pipe-separated standardized drug names in chronological order)
|
||||
-- Example: "ADALIMUMAB|ETANERCEPT|INFLIXIMAB"
|
||||
drug_sequence TEXT,
|
||||
|
||||
-- Drug frequency counts (JSON: {"ADALIMUMAB": 5, "ETANERCEPT": 3})
|
||||
-- Stores count of each drug for this patient
|
||||
drug_counts_json TEXT,
|
||||
|
||||
-- Drug cost totals (JSON: {"ADALIMUMAB": 15000.00, "ETANERCEPT": 8000.00})
|
||||
-- Stores total cost per drug for this patient
|
||||
drug_costs_json TEXT,
|
||||
|
||||
-- Per-drug date ranges (JSON: {"ADALIMUMAB": {"first": "2023-01-01", "last": "2023-06-15"}, ...})
|
||||
-- Stores first/last date for each drug
|
||||
drug_date_ranges_json TEXT,
|
||||
|
||||
-- Metadata
|
||||
computed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
source_row_count INTEGER -- Number of fact_interventions rows used
|
||||
);
|
||||
|
||||
-- Index for fast patient lookup
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_upid ON mv_patient_treatment_summary(upid);
|
||||
|
||||
-- Indexes for common filter patterns
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_org ON mv_patient_treatment_summary(org_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_directory ON mv_patient_treatment_summary(directory);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_first_seen ON mv_patient_treatment_summary(first_seen_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_last_seen ON mv_patient_treatment_summary(last_seen_date);
|
||||
|
||||
-- Composite index for date range filtering (common in generate_graph)
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_date_range
|
||||
ON mv_patient_treatment_summary(first_seen_date, last_seen_date);
|
||||
|
||||
-- Composite index for org + directory + dates (full filter pattern)
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_filter_composite
|
||||
ON mv_patient_treatment_summary(org_name, directory, first_seen_date, last_seen_date);
|
||||
|
||||
-- Index for drug sequence pattern matching
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_patient_summary_drug_seq ON mv_patient_treatment_summary(drug_sequence);
|
||||
"""
|
||||
|
||||
MATERIALIZED_VIEWS_SCHEMA = f"""
|
||||
-- Materialized Views Schema
|
||||
-- Pre-computed aggregations for performance
|
||||
|
||||
{MV_PATIENT_TREATMENT_SUMMARY_SCHEMA}
|
||||
"""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# File Tracking Schemas (Incremental Updates)
|
||||
# =============================================================================
|
||||
|
||||
PROCESSED_FILES_SCHEMA = """
|
||||
-- Tracks processed data files for incremental updates
|
||||
-- Enables detecting changed files by comparing hashes
|
||||
-- Stores processing status and statistics
|
||||
CREATE TABLE IF NOT EXISTS processed_files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
|
||||
-- File identification
|
||||
file_path TEXT NOT NULL, -- Full path to the file
|
||||
file_name TEXT NOT NULL, -- Just the filename (for display)
|
||||
file_hash TEXT NOT NULL, -- SHA256 hash of file contents
|
||||
|
||||
-- File metadata
|
||||
file_size_bytes INTEGER, -- Size of file in bytes
|
||||
file_modified_at TIMESTAMP, -- File's last modification timestamp
|
||||
|
||||
-- Processing results
|
||||
row_count INTEGER DEFAULT 0, -- Number of rows processed from this file
|
||||
status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, success, error
|
||||
error_message TEXT, -- Error details if status='error'
|
||||
|
||||
-- Timestamps
|
||||
first_processed_at TIMESTAMP, -- When first processed
|
||||
last_processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
processing_duration_seconds REAL, -- How long processing took
|
||||
|
||||
-- Uniqueness: only one record per file path
|
||||
-- Hash changes indicate file content changed (needs reprocessing)
|
||||
UNIQUE(file_path)
|
||||
);
|
||||
|
||||
-- Index for fast lookup by file path
|
||||
CREATE INDEX IF NOT EXISTS idx_processed_files_path ON processed_files(file_path);
|
||||
|
||||
-- Index for finding files by status (e.g., find all pending or errored files)
|
||||
CREATE INDEX IF NOT EXISTS idx_processed_files_status ON processed_files(status);
|
||||
|
||||
-- Index for finding files by hash (detect if same file appears at different paths)
|
||||
CREATE INDEX IF NOT EXISTS idx_processed_files_hash ON processed_files(file_hash);
|
||||
|
||||
-- Index for finding recently processed files
|
||||
CREATE INDEX IF NOT EXISTS idx_processed_files_last_processed ON processed_files(last_processed_at);
|
||||
"""
|
||||
|
||||
FILE_TRACKING_SCHEMA = f"""
|
||||
-- File Tracking Schema
|
||||
-- Supports incremental data loading
|
||||
|
||||
{PROCESSED_FILES_SCHEMA}
|
||||
"""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Combined Schemas
|
||||
# =============================================================================
|
||||
@@ -520,29 +282,14 @@ REFERENCE_TABLES_SCHEMA = f"""
|
||||
{REF_DRUG_DIRECTORY_MAP_SCHEMA}
|
||||
|
||||
{REF_DRUG_INDICATION_CLUSTERS_SCHEMA}
|
||||
|
||||
{REF_DRUG_SNOMED_MAPPING_SCHEMA}
|
||||
"""
|
||||
|
||||
FACT_TABLES_SCHEMA = f"""
|
||||
-- Fact Tables Schema
|
||||
-- Contains patient intervention data
|
||||
|
||||
{FACT_INTERVENTIONS_SCHEMA}
|
||||
"""
|
||||
|
||||
ALL_TABLES_SCHEMA = f"""
|
||||
-- Complete Database Schema
|
||||
-- Reference tables + Fact tables + Materialized views + File tracking + Pathway tables
|
||||
-- Reference tables + Pathway tables
|
||||
|
||||
{REFERENCE_TABLES_SCHEMA}
|
||||
|
||||
{FACT_TABLES_SCHEMA}
|
||||
|
||||
{MATERIALIZED_VIEWS_SCHEMA}
|
||||
|
||||
{FILE_TRACKING_SCHEMA}
|
||||
|
||||
{PATHWAY_TABLES_SCHEMA}
|
||||
"""
|
||||
|
||||
@@ -580,26 +327,10 @@ def drop_reference_tables(conn: sqlite3.Connection) -> None:
|
||||
DROP TABLE IF EXISTS ref_directories;
|
||||
DROP TABLE IF EXISTS ref_drug_directory_map;
|
||||
DROP TABLE IF EXISTS ref_drug_indication_clusters;
|
||||
DROP TABLE IF EXISTS ref_drug_snomed_mapping;
|
||||
""")
|
||||
logger.info("Reference tables dropped")
|
||||
|
||||
|
||||
def create_drug_snomed_mapping_table(conn: sqlite3.Connection) -> None:
|
||||
"""
|
||||
Create the ref_drug_snomed_mapping table for direct SNOMED code mapping.
|
||||
|
||||
This table stores mappings from drugs to SNOMED codes for GP record matching,
|
||||
enabling diagnosis-based directorate assignment and indication-based pathways.
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
"""
|
||||
logger.info("Creating ref_drug_snomed_mapping table...")
|
||||
conn.executescript(REF_DRUG_SNOMED_MAPPING_SCHEMA)
|
||||
logger.info("ref_drug_snomed_mapping table created successfully")
|
||||
|
||||
|
||||
def get_reference_table_counts(conn: sqlite3.Connection) -> dict[str, int]:
|
||||
"""
|
||||
Get row counts for all reference tables.
|
||||
@@ -616,7 +347,6 @@ def get_reference_table_counts(conn: sqlite3.Connection) -> dict[str, int]:
|
||||
"ref_directories",
|
||||
"ref_drug_directory_map",
|
||||
"ref_drug_indication_clusters",
|
||||
"ref_drug_snomed_mapping",
|
||||
]
|
||||
counts = {}
|
||||
|
||||
@@ -647,7 +377,6 @@ def verify_reference_tables_exist(conn: sqlite3.Connection) -> list[str]:
|
||||
"ref_directories",
|
||||
"ref_drug_directory_map",
|
||||
"ref_drug_indication_clusters",
|
||||
"ref_drug_snomed_mapping",
|
||||
]
|
||||
missing = []
|
||||
|
||||
@@ -662,164 +391,6 @@ def verify_reference_tables_exist(conn: sqlite3.Connection) -> list[str]:
|
||||
return missing
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fact Table Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
def create_fact_tables(conn: sqlite3.Connection) -> None:
|
||||
"""
|
||||
Create all fact tables in the database (including materialized views).
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
"""
|
||||
logger.info("Creating fact tables...")
|
||||
conn.executescript(FACT_TABLES_SCHEMA)
|
||||
conn.executescript(MATERIALIZED_VIEWS_SCHEMA)
|
||||
logger.info("Fact tables created successfully")
|
||||
|
||||
|
||||
def drop_fact_tables(conn: sqlite3.Connection) -> None:
|
||||
"""
|
||||
Drop all fact tables from the database.
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
|
||||
Warning:
|
||||
This will delete all patient intervention data. Use with caution.
|
||||
"""
|
||||
logger.warning("Dropping fact tables...")
|
||||
conn.executescript("""
|
||||
DROP TABLE IF EXISTS fact_interventions;
|
||||
DROP TABLE IF EXISTS mv_patient_treatment_summary;
|
||||
""")
|
||||
logger.info("Fact tables dropped")
|
||||
|
||||
|
||||
def get_fact_table_counts(conn: sqlite3.Connection) -> dict[str, int]:
|
||||
"""
|
||||
Get row counts for all fact tables (including materialized views).
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping table name to row count.
|
||||
"""
|
||||
tables = ["fact_interventions", "mv_patient_treatment_summary"]
|
||||
counts = {}
|
||||
|
||||
for table in tables:
|
||||
cursor = conn.execute(f"SELECT COUNT(*) FROM {table}")
|
||||
result = cursor.fetchone()
|
||||
counts[table] = result[0] if result else 0
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
def verify_fact_tables_exist(conn: sqlite3.Connection) -> list[str]:
|
||||
"""
|
||||
Verify that all fact tables exist (including materialized views).
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
|
||||
Returns:
|
||||
List of missing table names. Empty list means all tables exist.
|
||||
"""
|
||||
required_tables = ["fact_interventions", "mv_patient_treatment_summary"]
|
||||
missing = []
|
||||
|
||||
for table in required_tables:
|
||||
cursor = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
|
||||
(table,)
|
||||
)
|
||||
if cursor.fetchone() is None:
|
||||
missing.append(table)
|
||||
|
||||
return missing
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# File Tracking Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
def create_file_tracking_tables(conn: sqlite3.Connection) -> None:
|
||||
"""
|
||||
Create file tracking tables in the database.
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
"""
|
||||
logger.info("Creating file tracking tables...")
|
||||
conn.executescript(FILE_TRACKING_SCHEMA)
|
||||
logger.info("File tracking tables created successfully")
|
||||
|
||||
|
||||
def drop_file_tracking_tables(conn: sqlite3.Connection) -> None:
|
||||
"""
|
||||
Drop file tracking tables from the database.
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
|
||||
Warning:
|
||||
This will delete all file tracking history.
|
||||
"""
|
||||
logger.warning("Dropping file tracking tables...")
|
||||
conn.executescript("""
|
||||
DROP TABLE IF EXISTS processed_files;
|
||||
""")
|
||||
logger.info("File tracking tables dropped")
|
||||
|
||||
|
||||
def get_file_tracking_counts(conn: sqlite3.Connection) -> dict[str, int]:
|
||||
"""
|
||||
Get row counts for file tracking tables.
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping table name to row count.
|
||||
"""
|
||||
tables = ["processed_files"]
|
||||
counts = {}
|
||||
|
||||
for table in tables:
|
||||
cursor = conn.execute(f"SELECT COUNT(*) FROM {table}")
|
||||
result = cursor.fetchone()
|
||||
counts[table] = result[0] if result else 0
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
def verify_file_tracking_tables_exist(conn: sqlite3.Connection) -> list[str]:
|
||||
"""
|
||||
Verify that file tracking tables exist.
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
|
||||
Returns:
|
||||
List of missing table names. Empty list means all tables exist.
|
||||
"""
|
||||
required_tables = ["processed_files"]
|
||||
missing = []
|
||||
|
||||
for table in required_tables:
|
||||
cursor = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
|
||||
(table,)
|
||||
)
|
||||
if cursor.fetchone() is None:
|
||||
missing.append(table)
|
||||
|
||||
return missing
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pathway Table Helper Functions
|
||||
# =============================================================================
|
||||
@@ -1050,13 +621,37 @@ def migrate_pathway_nodes_chart_type(conn: sqlite3.Connection) -> tuple[bool, st
|
||||
return False, f"Migration failed: {e}"
|
||||
|
||||
|
||||
def migrate_refresh_log_source_row_count(conn: sqlite3.Connection) -> tuple[bool, str]:
|
||||
"""Add source_row_count column to pathway_refresh_log if it doesn't exist.
|
||||
|
||||
This column stores the Snowflake row count for display in the UI footer.
|
||||
"""
|
||||
cursor = conn.execute("PRAGMA table_info(pathway_refresh_log)")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
|
||||
if "source_row_count" in columns:
|
||||
return True, "source_row_count column already exists"
|
||||
|
||||
logger.info("Adding source_row_count column to pathway_refresh_log...")
|
||||
try:
|
||||
conn.execute("""
|
||||
ALTER TABLE pathway_refresh_log
|
||||
ADD COLUMN source_row_count INTEGER
|
||||
""")
|
||||
conn.commit()
|
||||
return True, "Added source_row_count column"
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add source_row_count column: {e}")
|
||||
return False, f"Migration failed: {e}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Combined Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
def create_all_tables(conn: sqlite3.Connection) -> None:
|
||||
"""
|
||||
Create all tables (reference + fact) in the database.
|
||||
Create all tables (reference + pathway) in the database.
|
||||
|
||||
Args:
|
||||
conn: SQLite database connection.
|
||||
@@ -1078,8 +673,6 @@ def drop_all_tables(conn: sqlite3.Connection) -> None:
|
||||
"""
|
||||
logger.warning("Dropping all tables...")
|
||||
drop_pathway_tables(conn)
|
||||
drop_file_tracking_tables(conn)
|
||||
drop_fact_tables(conn)
|
||||
drop_reference_tables(conn)
|
||||
logger.info("All tables dropped")
|
||||
|
||||
@@ -1096,8 +689,6 @@ def get_all_table_counts(conn: sqlite3.Connection) -> dict[str, int]:
|
||||
"""
|
||||
counts = {}
|
||||
counts.update(get_reference_table_counts(conn))
|
||||
counts.update(get_fact_table_counts(conn))
|
||||
counts.update(get_file_tracking_counts(conn))
|
||||
counts.update(get_pathway_table_counts(conn))
|
||||
return counts
|
||||
|
||||
@@ -1114,7 +705,5 @@ def verify_all_tables_exist(conn: sqlite3.Connection) -> list[str]:
|
||||
"""
|
||||
missing = []
|
||||
missing.extend(verify_reference_tables_exist(conn))
|
||||
missing.extend(verify_fact_tables_exist(conn))
|
||||
missing.extend(verify_file_tracking_tables_exist(conn))
|
||||
missing.extend(verify_pathway_tables_exist(conn))
|
||||
return missing
|
||||
|
||||
Reference in New Issue
Block a user