Initial commit before Ralph loop

This commit is contained in:
Andrew Charlwood
2026-02-04 13:04:29 +00:00
commit fdd33a67af
89 changed files with 20660 additions and 0 deletions
+17
View File
@@ -0,0 +1,17 @@
"""
Core module for NHS High-Cost Drug Patient Pathway Analysis Tool.
Contains configuration, models, and shared utilities used across the application.
"""
from core.config import PathConfig, default_paths
from core.models import AnalysisFilters
from core.logging_config import setup_logging, get_logger
__all__ = [
"PathConfig",
"default_paths",
"AnalysisFilters",
"setup_logging",
"get_logger",
]
+197
View File
@@ -0,0 +1,197 @@
"""
Configuration module for NHS High-Cost Drug Patient Pathway Analysis Tool.
Contains PathConfig dataclass for centralizing all file path references.
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
@dataclass
class PathConfig:
"""
Centralizes all file paths used across the application.
Provides a single source of truth for file locations, making it easier to:
- Change the data directory location
- Support different environments (development, production)
- Validate that required files exist
Attributes:
base_dir: Root directory of the application (defaults to current working directory)
data_dir: Directory containing reference data files
images_dir: Directory containing UI assets and fonts
"""
base_dir: Path = field(default_factory=Path.cwd)
_data_dir: Optional[Path] = field(default=None, repr=False)
_images_dir: Optional[Path] = field(default=None, repr=False)
def __post_init__(self) -> None:
"""Set default subdirectories relative to base_dir if not provided."""
if self._data_dir is None:
self._data_dir = self.base_dir / "data"
if self._images_dir is None:
self._images_dir = self.base_dir / "images"
@property
def data_dir(self) -> Path:
"""Directory containing reference data files."""
# _data_dir is always set after __post_init__
assert self._data_dir is not None
return self._data_dir
@property
def images_dir(self) -> Path:
"""Directory containing UI assets and fonts."""
# _images_dir is always set after __post_init__
assert self._images_dir is not None
return self._images_dir
# Reference data files (read-only lookups)
@property
def drugnames_csv(self) -> Path:
"""Drug name standardization mapping."""
return self.data_dir / "drugnames.csv"
@property
def directory_list_csv(self) -> Path:
"""Medical specialties/directories list."""
return self.data_dir / "directory_list.csv"
@property
def treatment_function_codes_csv(self) -> Path:
"""NHS treatment function code mappings."""
return self.data_dir / "treatment_function_codes.csv"
@property
def drug_directory_list_csv(self) -> Path:
"""Valid drug-to-directory mappings (pipe-separated)."""
return self.data_dir / "drug_directory_list.csv"
@property
def org_codes_csv(self) -> Path:
"""Provider code to organization name mapping."""
return self.data_dir / "org_codes.csv"
@property
def include_csv(self) -> Path:
"""Drug filter list with default selections."""
return self.data_dir / "include.csv"
@property
def default_trusts_csv(self) -> Path:
"""NHS Trust list for filter."""
return self.data_dir / "defaultTrusts.csv"
# Output/diagnostic files
@property
def na_directory_rows_csv(self) -> Path:
"""Exported rows with unresolved Directory for diagnostics."""
return self.data_dir / "na_directory_rows.csv"
@property
def ta_recommendations_xlsx(self) -> Path:
"""NICE TA recommendations (downloaded from web)."""
return self.data_dir / "ta-recommendations.xlsx"
# UI assets
@property
def font_medium(self) -> Path:
"""AvenirLTStd-Medium font file."""
return self.images_dir / "AvenirLTStd-Medium.ttf"
@property
def font_roman(self) -> Path:
"""AvenirLTStd-Roman font file."""
return self.images_dir / "AvenirLTStd-Roman.ttf"
@property
def logo_ico(self) -> Path:
"""Application icon."""
return self.images_dir / "logo.ico"
@property
def logo_png(self) -> Path:
"""Application logo."""
return self.images_dir / "logo.png"
def validate(self) -> list[str]:
"""
Validate that required files and directories exist.
Returns:
List of error messages. Empty list means all validations passed.
"""
errors = []
# Check directories exist
if not self.data_dir.exists():
errors.append(f"Data directory not found: {self.data_dir}")
if not self.images_dir.exists():
errors.append(f"Images directory not found: {self.images_dir}")
# Check required reference files
required_files = [
(self.drugnames_csv, "Drug names mapping"),
(self.directory_list_csv, "Directory list"),
(self.treatment_function_codes_csv, "Treatment function codes"),
(self.drug_directory_list_csv, "Drug-directory mapping"),
(self.org_codes_csv, "Organization codes"),
(self.include_csv, "Drug include list"),
(self.default_trusts_csv, "Default trusts"),
]
for file_path, description in required_files:
if not file_path.exists():
errors.append(f"{description} not found: {file_path}")
return errors
def validate_fonts(self) -> list[str]:
"""
Validate that font files exist (for GUI mode).
Returns:
List of error messages. Empty list means all validations passed.
"""
errors = []
font_files = [
(self.font_medium, "Medium font"),
(self.font_roman, "Roman font"),
]
for file_path, description in font_files:
if not file_path.exists():
errors.append(f"{description} not found: {file_path}")
return errors
def as_legacy_paths(self) -> dict[str, str]:
"""
Return paths as strings with './' prefix for backwards compatibility.
This method eases migration by providing paths in the format
currently used throughout the codebase.
Returns:
Dictionary mapping path names to legacy-format string paths.
"""
return {
"drugnames_csv": f"./{self.drugnames_csv.relative_to(self.base_dir)}",
"directory_list_csv": f"./{self.directory_list_csv.relative_to(self.base_dir)}",
"treatment_function_codes_csv": f"./{self.treatment_function_codes_csv.relative_to(self.base_dir)}",
"drug_directory_list_csv": f"./{self.drug_directory_list_csv.relative_to(self.base_dir)}",
"org_codes_csv": f"./{self.org_codes_csv.relative_to(self.base_dir)}",
"include_csv": f"./{self.include_csv.relative_to(self.base_dir)}",
"default_trusts_csv": f"./{self.default_trusts_csv.relative_to(self.base_dir)}",
"na_directory_rows_csv": f"./{self.na_directory_rows_csv.relative_to(self.base_dir)}",
"ta_recommendations_xlsx": f"./{self.ta_recommendations_xlsx.relative_to(self.base_dir)}",
}
# Default instance for application-wide use
default_paths = PathConfig()
+121
View File
@@ -0,0 +1,121 @@
"""
Logging configuration for NHS High-Cost Drug Patient Pathway Analysis Tool.
Provides structured logging setup with console and optional file handlers.
"""
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Optional
# Default log format: timestamp, level, module name, message
DEFAULT_FORMAT = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
DEFAULT_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
# Simplified format for console output (used when redirecting to GUI)
SIMPLE_FORMAT = "%(message)s"
def setup_logging(
level: int = logging.INFO,
log_dir: Optional[Path] = None,
console: bool = True,
file_logging: bool = False,
simple_console: bool = False,
) -> logging.Logger:
"""
Configure application-wide logging.
Args:
level: Logging level (default: INFO)
log_dir: Directory for log files (default: ./logs/)
console: Whether to log to console/stdout (default: True)
file_logging: Whether to log to file (default: False)
simple_console: Use simplified format for console (just message, no timestamp)
Returns:
Root logger configured for the application
Usage:
# Basic setup - console only
logger = setup_logging()
# With file logging
logger = setup_logging(file_logging=True)
# Debug mode
logger = setup_logging(level=logging.DEBUG)
# GUI mode - simple format for stdout capture
logger = setup_logging(simple_console=True)
"""
# Get root logger for the application
root_logger = logging.getLogger("pathways")
# Clear any existing handlers to avoid duplicates on re-initialization
root_logger.handlers.clear()
root_logger.setLevel(level)
# Console handler
if console:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(level)
if simple_console:
console_format = logging.Formatter(SIMPLE_FORMAT)
else:
console_format = logging.Formatter(DEFAULT_FORMAT, datefmt=DEFAULT_DATE_FORMAT)
console_handler.setFormatter(console_format)
root_logger.addHandler(console_handler)
# File handler
if file_logging:
if log_dir is None:
log_dir = Path("./logs")
log_dir.mkdir(parents=True, exist_ok=True)
log_filename = f"pathways_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
log_path = log_dir / log_filename
file_handler = logging.FileHandler(log_path, encoding="utf-8")
file_handler.setLevel(level)
file_handler.setFormatter(
logging.Formatter(DEFAULT_FORMAT, datefmt=DEFAULT_DATE_FORMAT)
)
root_logger.addHandler(file_handler)
return root_logger
def get_logger(name: str) -> logging.Logger:
"""
Get a logger for a specific module.
Args:
name: Module name (typically __name__)
Returns:
Logger instance configured as child of root pathways logger
Usage:
from core.logging_config import get_logger
logger = get_logger(__name__)
logger.info("Processing started")
logger.error("Something went wrong")
"""
# Create child logger under the pathways namespace
if name.startswith("pathways."):
return logging.getLogger(name)
return logging.getLogger(f"pathways.{name}")
# Module-level loggers for common components
data_logger = get_logger("data")
dashboard_logger = get_logger("dashboard")
gui_logger = get_logger("gui")
+140
View File
@@ -0,0 +1,140 @@
"""
Data models for NHS High-Cost Drug Patient Pathway Analysis Tool.
Contains dataclasses for encapsulating application state and filter parameters.
"""
from dataclasses import dataclass, field
from datetime import date
from pathlib import Path
from typing import Optional
@dataclass
class AnalysisFilters:
"""
Encapsulates all filter state for the analysis pipeline.
Replaces the individual parameters currently passed to generate_graph()
and the global state managed in the GUI. This provides:
- Type safety for filter values
- Validation of filter combinations
- Easy serialization for caching/persistence
- Clear interface between GUI and analysis engine
Attributes:
start_date: Patient initiated start date (treatment pathway start)
end_date: Patient initiated end date (treatment pathway start cutoff)
last_seen_date: Minimum last seen date (filters out patients not seen recently)
trusts: List of NHS Trust names to include (empty = all)
drugs: List of drug names to include (empty = all)
directories: List of medical directories/specialties to include (empty = all)
custom_title: Optional custom title for the graph (blank = auto-generated)
minimum_patients: Minimum number of patients for a pathway to be included
output_dir: Directory where output files should be saved
"""
start_date: date
end_date: date
last_seen_date: date
trusts: list[str] = field(default_factory=list)
drugs: list[str] = field(default_factory=list)
directories: list[str] = field(default_factory=list)
custom_title: str = ""
minimum_patients: int = 0
output_dir: Optional[Path] = None
def validate(self) -> list[str]:
"""
Validate filter configuration for logical consistency.
Returns:
List of error messages. Empty list means all validations passed.
"""
errors = []
# Date range validation
if self.end_date < self.start_date:
errors.append(
f"End date ({self.end_date}) cannot be before start date ({self.start_date})"
)
if self.last_seen_date > self.end_date:
errors.append(
f"Last seen date ({self.last_seen_date}) is after end date ({self.end_date}), "
"which would exclude all patients"
)
# Minimum patients validation
if self.minimum_patients < 0:
errors.append(
f"Minimum patients ({self.minimum_patients}) cannot be negative"
)
# Output directory validation
if self.output_dir is not None and not self.output_dir.exists():
errors.append(f"Output directory does not exist: {self.output_dir}")
# Filter list validation (warn if empty but don't error)
# Empty lists are valid and mean "include all"
return errors
@property
def has_trust_filter(self) -> bool:
"""Check if any trust filter is applied."""
return len(self.trusts) > 0
@property
def has_drug_filter(self) -> bool:
"""Check if any drug filter is applied."""
return len(self.drugs) > 0
@property
def has_directory_filter(self) -> bool:
"""Check if any directory filter is applied."""
return len(self.directories) > 0
@property
def title(self) -> str:
"""
Return the display title for the graph.
If custom_title is set, use it. Otherwise, generate a default title
based on the date range.
"""
if self.custom_title:
return self.custom_title
return f"Patients initiated from {self.start_date} to {self.end_date}"
def summary(self) -> str:
"""
Return a human-readable summary of the filter configuration.
Useful for logging and display in the GUI.
"""
lines = [
f"Date range: {self.start_date} to {self.end_date}",
f"Last seen after: {self.last_seen_date}",
f"Minimum patients: {self.minimum_patients}",
]
if self.trusts:
lines.append(f"Trusts: {len(self.trusts)} selected")
else:
lines.append("Trusts: All")
if self.drugs:
lines.append(f"Drugs: {len(self.drugs)} selected")
else:
lines.append("Drugs: All")
if self.directories:
lines.append(f"Directories: {len(self.directories)} selected")
else:
lines.append("Directories: All")
if self.custom_title:
lines.append(f"Custom title: {self.custom_title}")
return "\n".join(lines)