feat: US-003 - Generate and commit embeddings.json

This commit is contained in:
2026-02-15 17:55:53 +00:00
parent 219a3f04be
commit aa1774320a
4 changed files with 16374 additions and 7 deletions
+16 -6
View File
@@ -1,17 +1,27 @@
import { writeFileSync } from 'node:fs'
import { resolve } from 'node:path'
import { pipeline } from '@xenova/transformers'
import { buildEmbeddingTexts } from '@/lib/search'
async function main() {
const items = buildEmbeddingTexts()
console.log(`Found ${items.length} items to embed.`)
console.log('Loading all-MiniLM-L6-v2 model...')
const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2')
const testString = 'This is a test string for embedding generation.'
console.log(`Embedding test string: "${testString}"`)
const embeddings: Array<{ id: string; embedding: number[] }> = []
const output = await extractor(testString, { pooling: 'mean', normalize: true })
const vector = Array.from(output.data as Float32Array)
for (const item of items) {
const output = await extractor(item.text, { pooling: 'mean', normalize: true })
const vector = Array.from(output.data as Float32Array)
embeddings.push({ id: item.id, embedding: vector })
console.log(` [${embeddings.length}/${items.length}] ${item.id} (${vector.length}d)`)
}
console.log(`Vector length: ${vector.length}`)
console.log('Done.')
const outPath = resolve(import.meta.dirname, '..', 'src', 'data', 'embeddings.json')
writeFileSync(outPath, JSON.stringify(embeddings, null, 2))
console.log(`\nWrote ${embeddings.length} embeddings to ${outPath}`)
}
main().catch((err) => {