From c4480d7c99d19d57969a6f996554c76ac07d7804 Mon Sep 17 00:00:00 2001
From: Andy Charlwood <andrew.charlwood@gmail.com>
Date: Sun, 15 Feb 2026 18:01:51 +0000
Subject: [PATCH] feat: US-005 - Implement cosine similarity search module

---
 Ralph/prd.json             |  2 +-
 Ralph/progress.txt         | 13 ++++++++++++
 src/lib/semantic-search.ts | 42 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 src/lib/semantic-search.ts

diff --git a/Ralph/prd.json b/Ralph/prd.json
index c7549ee..272ad49 100644
--- a/Ralph/prd.json
+++ b/Ralph/prd.json
@@ -89,7 +89,7 @@
         "Typecheck passes"
       ],
       "priority": 5,
-      "passes": false,
+      "passes": true,
       "notes": "Keep the cosine similarity implementation simple — no libraries needed for 384-d vectors over ~40 items. The loadEmbeddings function can use a dynamic import or direct import of the JSON file (Vite handles JSON imports natively)."
     },
     {
diff --git a/Ralph/progress.txt b/Ralph/progress.txt
index b2d9467..621f254 100644
--- a/Ralph/progress.txt
+++ b/Ralph/progress.txt
@@ -12,6 +12,7 @@
 - `src/data/embeddings.json` is an array of `{ id: string, embedding: number[] }` — 42 items, 384-d vectors, IDs match PaletteItem IDs. Vite imports JSON natively.
 - `src/lib/embedding-model.ts` exports `initModel()`, `embedQuery(text)`, `isModelReady()` — check `isModelReady()` before calling `embedQuery()`
 - `initModel()` is called fire-and-forget in `App.tsx` on mount — model loads during boot/ECG/login phases
+- `src/lib/semantic-search.ts` exports `semanticSearch(queryEmbedding, embeddings, threshold?)` and `loadEmbeddings()` — embeddings are normalized so cosine similarity is dot(a,b)/(mag(a)*mag(b))
 
 ---
 
@@ -80,3 +81,15 @@
   - `initModel()` is intentionally not awaited — it's fire-and-forget so it doesn't block the boot animation
   - Consumers should check `isModelReady()` before calling `embedQuery()` — it throws if model isn't loaded
 ---
+
+## 2026-02-15 - US-005
+- Created `src/lib/semantic-search.ts` with cosine similarity search and embeddings loader
+- `semanticSearch()` computes cosine similarity, filters by threshold (default 0.3), returns sorted by score descending
+- `loadEmbeddings()` imports `embeddings.json` via Vite's native JSON import and returns typed array
+- Typecheck and lint pass (0 new warnings)
+- Files changed: `src/lib/semantic-search.ts` (new)
+- **Learnings for future iterations:**
+  - Vite handles JSON imports natively — `import data from '@/data/embeddings.json'` just works, no dynamic import needed
+  - Since embeddings are already L2-normalized (from pipeline's `normalize: true`), cosine similarity simplifies to just the dot product. However, the full formula is kept for correctness in case non-normalized vectors are ever used
+  - With only ~42 items and 384-d vectors, brute-force cosine similarity is fast enough — no need for approximate nearest neighbor libraries
+---
diff --git a/src/lib/semantic-search.ts b/src/lib/semantic-search.ts
new file mode 100644
index 0000000..18187d8
--- /dev/null
+++ b/src/lib/semantic-search.ts
@@ -0,0 +1,42 @@
+import embeddingsData from '@/data/embeddings.json'
+
+interface EmbeddingEntry {
+  id: string
+  embedding: number[]
+}
+
+interface SearchResult {
+  id: string
+  score: number
+}
+
+function cosineSimilarity(a: number[], b: number[]): number {
+  let dot = 0
+  let magA = 0
+  let magB = 0
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i]
+    magA += a[i] * a[i]
+    magB += b[i] * b[i]
+  }
+  const denom = Math.sqrt(magA) * Math.sqrt(magB)
+  return denom === 0 ? 0 : dot / denom
+}
+
+export function semanticSearch(
+  queryEmbedding: number[],
+  embeddings: EmbeddingEntry[],
+  threshold = 0.3
+): SearchResult[] {
+  return embeddings
+    .map(entry => ({
+      id: entry.id,
+      score: cosineSimilarity(queryEmbedding, entry.embedding),
+    }))
+    .filter(r => r.score >= threshold)
+    .sort((a, b) => b.score - a.score)
+}
+
+export function loadEmbeddings(): EmbeddingEntry[] {
+  return embeddingsData as EmbeddingEntry[]
+}