feat: US-005 - Implement cosine similarity search module
This commit is contained in:
+1
-1
@@ -89,7 +89,7 @@
|
|||||||
"Typecheck passes"
|
"Typecheck passes"
|
||||||
],
|
],
|
||||||
"priority": 5,
|
"priority": 5,
|
||||||
"passes": false,
|
"passes": true,
|
||||||
"notes": "Keep the cosine similarity implementation simple — no libraries needed for 384-d vectors over ~40 items. The loadEmbeddings function can use a dynamic import or direct import of the JSON file (Vite handles JSON imports natively)."
|
"notes": "Keep the cosine similarity implementation simple — no libraries needed for 384-d vectors over ~40 items. The loadEmbeddings function can use a dynamic import or direct import of the JSON file (Vite handles JSON imports natively)."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
- `src/data/embeddings.json` is an array of `{ id: string, embedding: number[] }` — 42 items, 384-d vectors, IDs match PaletteItem IDs. Vite imports JSON natively.
|
- `src/data/embeddings.json` is an array of `{ id: string, embedding: number[] }` — 42 items, 384-d vectors, IDs match PaletteItem IDs. Vite imports JSON natively.
|
||||||
- `src/lib/embedding-model.ts` exports `initModel()`, `embedQuery(text)`, `isModelReady()` — check `isModelReady()` before calling `embedQuery()`
|
- `src/lib/embedding-model.ts` exports `initModel()`, `embedQuery(text)`, `isModelReady()` — check `isModelReady()` before calling `embedQuery()`
|
||||||
- `initModel()` is called fire-and-forget in `App.tsx` on mount — model loads during boot/ECG/login phases
|
- `initModel()` is called fire-and-forget in `App.tsx` on mount — model loads during boot/ECG/login phases
|
||||||
|
- `src/lib/semantic-search.ts` exports `semanticSearch(queryEmbedding, embeddings, threshold?)` and `loadEmbeddings()` — embeddings are normalized so cosine similarity is dot(a,b)/(mag(a)*mag(b))
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -80,3 +81,15 @@
|
|||||||
- `initModel()` is intentionally not awaited — it's fire-and-forget so it doesn't block the boot animation
|
- `initModel()` is intentionally not awaited — it's fire-and-forget so it doesn't block the boot animation
|
||||||
- Consumers should check `isModelReady()` before calling `embedQuery()` — it throws if model isn't loaded
|
- Consumers should check `isModelReady()` before calling `embedQuery()` — it throws if model isn't loaded
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## 2026-02-15 - US-005
|
||||||
|
- Created `src/lib/semantic-search.ts` with cosine similarity search and embeddings loader
|
||||||
|
- `semanticSearch()` computes cosine similarity, filters by threshold (default 0.3), returns sorted by score descending
|
||||||
|
- `loadEmbeddings()` imports `embeddings.json` via Vite's native JSON import and returns typed array
|
||||||
|
- Typecheck and lint pass (0 new warnings)
|
||||||
|
- Files changed: `src/lib/semantic-search.ts` (new)
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- Vite handles JSON imports natively — `import data from '@/data/embeddings.json'` just works, no dynamic import needed
|
||||||
|
- Since embeddings are already L2-normalized (from pipeline's `normalize: true`), cosine similarity simplifies to just the dot product. However, the full formula is kept for correctness in case non-normalized vectors are ever used
|
||||||
|
- With only ~42 items and 384-d vectors, brute-force cosine similarity is fast enough — no need for approximate nearest neighbor libraries
|
||||||
|
---
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
import embeddingsData from '@/data/embeddings.json'
|
||||||
|
|
||||||
|
interface EmbeddingEntry {
|
||||||
|
id: string
|
||||||
|
embedding: number[]
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SearchResult {
|
||||||
|
id: string
|
||||||
|
score: number
|
||||||
|
}
|
||||||
|
|
||||||
|
function cosineSimilarity(a: number[], b: number[]): number {
|
||||||
|
let dot = 0
|
||||||
|
let magA = 0
|
||||||
|
let magB = 0
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dot += a[i] * b[i]
|
||||||
|
magA += a[i] * a[i]
|
||||||
|
magB += b[i] * b[i]
|
||||||
|
}
|
||||||
|
const denom = Math.sqrt(magA) * Math.sqrt(magB)
|
||||||
|
return denom === 0 ? 0 : dot / denom
|
||||||
|
}
|
||||||
|
|
||||||
|
export function semanticSearch(
|
||||||
|
queryEmbedding: number[],
|
||||||
|
embeddings: EmbeddingEntry[],
|
||||||
|
threshold = 0.3
|
||||||
|
): SearchResult[] {
|
||||||
|
return embeddings
|
||||||
|
.map(entry => ({
|
||||||
|
id: entry.id,
|
||||||
|
score: cosineSimilarity(queryEmbedding, entry.embedding),
|
||||||
|
}))
|
||||||
|
.filter(r => r.score >= threshold)
|
||||||
|
.sort((a, b) => b.score - a.score)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function loadEmbeddings(): EmbeddingEntry[] {
|
||||||
|
return embeddingsData as EmbeddingEntry[]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user