Files
portfolio/scripts/generate-embeddings.ts
T

31 lines
1.1 KiB
TypeScript

import { writeFileSync } from 'node:fs'
import { resolve } from 'node:path'
import { pipeline } from '@xenova/transformers'
import { buildEmbeddingTexts } from '@/lib/search'
async function main() {
const items = buildEmbeddingTexts()
console.log(`Found ${items.length} items to embed.`)
console.log('Loading all-MiniLM-L6-v2 model...')
const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2')
const embeddings: Array<{ id: string; embedding: number[] }> = []
for (const item of items) {
const output = await extractor(item.text, { pooling: 'mean', normalize: true })
const vector = Array.from(output.data as Float32Array)
embeddings.push({ id: item.id, embedding: vector })
console.log(` [${embeddings.length}/${items.length}] ${item.id} (${vector.length}d)`)
}
const outPath = resolve(import.meta.dirname, '..', 'src', 'data', 'embeddings.json')
writeFileSync(outPath, JSON.stringify(embeddings, null, 2))
console.log(`\nWrote ${embeddings.length} embeddings to ${outPath}`)
}
main().catch((err) => {
console.error('Failed:', err)
process.exit(1)
})