feat: add directorate card tree builder for drug browser drawer (Task 1.2)

2026-02-06 13:06:29 +00:00
parent b71748fa7d
commit 76549420a0
3 changed files with 127 additions and 3 deletions
@@ -0,0 +1,83 @@
+"""
+Directorate card tree builder for the drug browser drawer.
+
+Loads DimSearchTerm.csv and builds a nested structure:
+    {PrimaryDirectorate: {Search_Term: [drug_fragment, ...]}}
+
+Also provides get_all_drugs() for the flat "All Drugs" card.
+"""
+
+import csv
+from collections import defaultdict
+from pathlib import Path
+
+from data_processing.diagnosis_lookup import SEARCH_TERM_MERGE_MAP
+
+DATA_DIR = Path(__file__).resolve().parents[2] / "data"
+DIM_SEARCH_TERM_PATH = DATA_DIR / "DimSearchTerm.csv"
+
+
+def build_directorate_tree() -> dict[str, dict[str, list[str]]]:
+    """
+    Build a nested dict from DimSearchTerm.csv grouped by directorate.
+
+    Returns:
+        {
+            "CARDIOLOGY": {
+                "acute coronary syndrome": ["ABCIXIMAB", "CLOPIDOGREL", ...],
+                "atrial fibrillation": ["APIXABAN", "DABIGATRAN", ...],
+                ...
+            },
+            "CLINICAL HAEMATOLOGY": { ... },
+            ...
+        }
+
+    Search_Term values are normalized via SEARCH_TERM_MERGE_MAP
+    (e.g. "allergic asthma" → "asthma"). Drug fragments within
+    merged terms are combined and deduplicated.
+    """
+    # directorate → search_term → set of drug fragments
+    tree: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
+
+    with open(DIM_SEARCH_TERM_PATH, newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            search_term = (row.get("Search_Term") or "").strip().lower()
+            drug_names_raw = row.get("CleanedDrugName") or ""
+            directorate = (row.get("PrimaryDirectorate") or "").strip().upper()
+
+            if not search_term or not directorate:
+                continue
+
+            # Apply merge map (e.g. "allergic asthma" → "asthma")
+            search_term = SEARCH_TERM_MERGE_MAP.get(search_term, search_term)
+
+            fragments = [
+                frag.strip().upper()
+                for frag in drug_names_raw.split("|")
+                if frag.strip()
+            ]
+
+            tree[directorate][search_term].update(fragments)
+
+    # Convert sets → sorted lists and sort at every level
+    result: dict[str, dict[str, list[str]]] = {}
+    for directorate in sorted(tree):
+        result[directorate] = {
+            term: sorted(tree[directorate][term])
+            for term in sorted(tree[directorate])
+        }
+
+    return result
+
+
+def get_all_drugs() -> list[str]:
+    """
+    Return a sorted flat list of all unique drug labels from pathway_nodes level 3.
+
+    Delegates to load_initial_data() which already queries the database.
+    """
+    from dash_app.data.queries import load_initial_data
+
+    data = load_initial_data()
+    return data.get("available_drugs", [])