From c4480d7c99d19d57969a6f996554c76ac07d7804 Mon Sep 17 00:00:00 2001 From: Andy Charlwood Date: Sun, 15 Feb 2026 18:01:51 +0000 Subject: [PATCH] feat: US-005 - Implement cosine similarity search module --- Ralph/prd.json | 2 +- Ralph/progress.txt | 13 ++++++++++++ src/lib/semantic-search.ts | 42 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 src/lib/semantic-search.ts diff --git a/Ralph/prd.json b/Ralph/prd.json index c7549ee..272ad49 100644 --- a/Ralph/prd.json +++ b/Ralph/prd.json @@ -89,7 +89,7 @@ "Typecheck passes" ], "priority": 5, - "passes": false, + "passes": true, "notes": "Keep the cosine similarity implementation simple — no libraries needed for 384-d vectors over ~40 items. The loadEmbeddings function can use a dynamic import or direct import of the JSON file (Vite handles JSON imports natively)." }, { diff --git a/Ralph/progress.txt b/Ralph/progress.txt index b2d9467..621f254 100644 --- a/Ralph/progress.txt +++ b/Ralph/progress.txt @@ -12,6 +12,7 @@ - `src/data/embeddings.json` is an array of `{ id: string, embedding: number[] }` — 42 items, 384-d vectors, IDs match PaletteItem IDs. Vite imports JSON natively. - `src/lib/embedding-model.ts` exports `initModel()`, `embedQuery(text)`, `isModelReady()` — check `isModelReady()` before calling `embedQuery()` - `initModel()` is called fire-and-forget in `App.tsx` on mount — model loads during boot/ECG/login phases +- `src/lib/semantic-search.ts` exports `semanticSearch(queryEmbedding, embeddings, threshold?)` and `loadEmbeddings()` — embeddings are normalized so cosine similarity is dot(a,b)/(mag(a)*mag(b)) --- @@ -80,3 +81,15 @@ - `initModel()` is intentionally not awaited — it's fire-and-forget so it doesn't block the boot animation - Consumers should check `isModelReady()` before calling `embedQuery()` — it throws if model isn't loaded --- + +## 2026-02-15 - US-005 +- Created `src/lib/semantic-search.ts` with cosine similarity search and embeddings loader +- `semanticSearch()` computes cosine similarity, filters by threshold (default 0.3), returns sorted by score descending +- `loadEmbeddings()` imports `embeddings.json` via Vite's native JSON import and returns typed array +- Typecheck and lint pass (0 new warnings) +- Files changed: `src/lib/semantic-search.ts` (new) +- **Learnings for future iterations:** + - Vite handles JSON imports natively — `import data from '@/data/embeddings.json'` just works, no dynamic import needed + - Since embeddings are already L2-normalized (from pipeline's `normalize: true`), cosine similarity simplifies to just the dot product. However, the full formula is kept for correctness in case non-normalized vectors are ever used + - With only ~42 items and 384-d vectors, brute-force cosine similarity is fast enough — no need for approximate nearest neighbor libraries +--- diff --git a/src/lib/semantic-search.ts b/src/lib/semantic-search.ts new file mode 100644 index 0000000..18187d8 --- /dev/null +++ b/src/lib/semantic-search.ts @@ -0,0 +1,42 @@ +import embeddingsData from '@/data/embeddings.json' + +interface EmbeddingEntry { + id: string + embedding: number[] +} + +interface SearchResult { + id: string + score: number +} + +function cosineSimilarity(a: number[], b: number[]): number { + let dot = 0 + let magA = 0 + let magB = 0 + for (let i = 0; i < a.length; i++) { + dot += a[i] * b[i] + magA += a[i] * a[i] + magB += b[i] * b[i] + } + const denom = Math.sqrt(magA) * Math.sqrt(magB) + return denom === 0 ? 0 : dot / denom +} + +export function semanticSearch( + queryEmbedding: number[], + embeddings: EmbeddingEntry[], + threshold = 0.3 +): SearchResult[] { + return embeddings + .map(entry => ({ + id: entry.id, + score: cosineSimilarity(queryEmbedding, entry.embedding), + })) + .filter(r => r.score >= threshold) + .sort((a, b) => b.score - a.score) +} + +export function loadEmbeddings(): EmbeddingEntry[] { + return embeddingsData as EmbeddingEntry[] +}